From 4bce1a19fcfac0c9f22c11278daa510546ccd3f2 Mon Sep 17 00:00:00 2001
From: Michael Tuexen <tuexen@FreeBSD.org>
Date: Sat, 4 Jan 2025 21:08:43 +0100
Subject: [PATCH 001/143] TCP BBR: remove code which is not needed

rc_bbr_substate is a 3-bit unsigned int, so it can't be larger than
or equal to 8. The wrap around already happens.
No functional change intended.

Reviewed by:		rrs
CID:			1523795
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D48320
---
 sys/netinet/tcp_stacks/bbr.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index ed7e07861ebdef..535766a0a1b004 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -10313,10 +10313,6 @@ bbr_substate_change(struct tcp_bbr *bbr, uint32_t cts, int32_t line, int dolog)
 			bbr->r_ctl.bbr_smallest_srtt_state2 = bbr->r_ctl.bbr_smallest_srtt_this_state;
 	}
 	bbr->rc_bbr_substate++;
-	if (bbr->rc_bbr_substate >= BBR_SUBSTATE_COUNT) {
-		/* Cycle back to first state-> gain */
-		bbr->rc_bbr_substate = 0;
-	}
 	if (bbr_state_val(bbr) == BBR_SUB_GAIN) {
 		/*
 		 * We enter the gain(5/4) cycle (possibly less if

From 84e894ce1309b426aa5d1a20ec194401f35dc478 Mon Sep 17 00:00:00 2001
From: Michael Tuexen <tuexen@FreeBSD.org>
Date: Sat, 4 Jan 2025 21:11:26 +0100
Subject: [PATCH 002/143] TCP RACK: remove variable with is only initialized
 and not changed

minslot is initialized to 0 and never changed. It is not clear to me
under which condition minslot should be set to which value.
Therefore, remove it and the code checking that it is not zero.
No functional change intended.

Reviewed by:		rrs
CID:			1523812
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D48321
---
 sys/netinet/tcp_stacks/rack.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 6774acb9d5e62f..cc07253247609c 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -17456,7 +17456,6 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 {
 	uint64_t srtt;
 	int32_t slot = 0;
-	int32_t minslot = 0;
 	int can_start_hw_pacing = 1;
 	int err;
 	int pace_one;
@@ -17788,11 +17787,6 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 				}
 			}
 		}
-		if (minslot && (minslot > slot)) {
-			rack_log_pacing_delay_calc(rack, minslot, slot, rack->r_ctl.crte->rate, bw_est, lentim,
-						   98, __LINE__, NULL, 0);
-			slot = minslot;
-		}
 	done_w_hdwr:
 		if (rack_limit_time_with_srtt &&
 		    (rack->use_fixed_rate == 0) &&

From 0f7d8b71b45b0a86b25e1005e83140ee6cbdff45 Mon Sep 17 00:00:00 2001
From: Ed Maste <emaste@FreeBSD.org>
Date: Thu, 2 Jan 2025 02:26:00 +0000
Subject: [PATCH 003/143] Makefile.inc1: Set DISTDIR in stagekernel target

The distributekernel target expects DESTDIR and DISTDIR to be set.  The
stagekernel target invokes `make distributekernel`, and previously left
DISTDIR unset, resulting in a path with a "//" component.  Instead, set
DISTDIR to . to make the way we're (ab)using the distributekernel target
more explicit.

Reviewed by:	bapt
Sponsored by:	The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D48288
---
 Makefile.inc1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.inc1 b/Makefile.inc1
index 5c3d190e4c3ede..bad747a0e551e9 100644
--- a/Makefile.inc1
+++ b/Makefile.inc1
@@ -1991,7 +1991,7 @@ packagekernel: .PHONY
 .endif
 
 stagekernel: .PHONY
-	${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} distributekernel
+	${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} DISTDIR=. distributekernel
 
 PORTSDIR?=	/usr/ports
 WSTAGEDIR?=	${OBJTOP}/worldstage

From 48b9d78a0a9d795cfdeb56895a27309aadd50c77 Mon Sep 17 00:00:00 2001
From: Daniel Schaefer <dhs@frame.work>
Date: Sat, 4 Jan 2025 23:53:37 +0800
Subject: [PATCH 004/143] hda: Add patch for Framework Laptop Intel 13th gen

It uses the same audio codec as 12th gen (PCI ID 0x0002).
Actually everything is the same, except the CPU.

Signed-off-by: Daniel Schaefer <dhs@frame.work>
---
 sys/dev/sound/pci/hda/hdaa_patches.c | 3 ++-
 sys/dev/sound/pci/hda/hdac.h         | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/sys/dev/sound/pci/hda/hdaa_patches.c b/sys/dev/sound/pci/hda/hdaa_patches.c
index d425e3c0080b13..3c063deb0822d5 100644
--- a/sys/dev/sound/pci/hda/hdaa_patches.c
+++ b/sys/dev/sound/pci/hda/hdaa_patches.c
@@ -330,7 +330,8 @@ hdac_pin_patch(struct hdaa_widget *w)
 		}
 	} else if (id == HDA_CODEC_IDT92HD95B &&
 	    (subid == FRAMEWORK_LAPTOP_0001_SUBVENDOR ||
-	    subid == FRAMEWORK_LAPTOP_0002_SUBVENDOR)) {
+	    subid == FRAMEWORK_LAPTOP_0002_SUBVENDOR ||
+	    subid == FRAMEWORK_LAPTOP_0003_SUBVENDOR)) {
 		switch (nid) {
 		case 10:
 			patch_str = "as=1 seq=15 color=Black loc=Left";
diff --git a/sys/dev/sound/pci/hda/hdac.h b/sys/dev/sound/pci/hda/hdac.h
index d00ad4e4705c59..f0e72f091a85c2 100644
--- a/sys/dev/sound/pci/hda/hdac.h
+++ b/sys/dev/sound/pci/hda/hdac.h
@@ -528,6 +528,7 @@
 #define FRAMEWORK_VENDORID	0xf111
 #define FRAMEWORK_LAPTOP_0001_SUBVENDOR HDA_MODEL_CONSTRUCT(FRAMEWORK, 0x0001)
 #define FRAMEWORK_LAPTOP_0002_SUBVENDOR HDA_MODEL_CONSTRUCT(FRAMEWORK, 0x0002)
+#define FRAMEWORK_LAPTOP_0003_SUBVENDOR HDA_MODEL_CONSTRUCT(FRAMEWORK, 0x0003)
 
 /* All codecs you can eat... */
 #define HDA_CODEC_CONSTRUCT(vendor, id) \

From 93411b39fff24ab4c9bf2b0395c7789b1a1c7a42 Mon Sep 17 00:00:00 2001
From: Adrian Chadd <adrian@FreeBSD.org>
Date: Sun, 15 Dec 2024 18:05:13 -0800
Subject: [PATCH 005/143] rtwn: calculate control rate for VHT rate frames

If the passed in rate is a VHT rate, use rtwn_ctl_vhtrate() to
find a suitable rate for RTS/CTS.

Differential Revision:	 https://reviews.freebsd.org/D48295
Reviewed by:	bz, cy, emaste
---
 sys/dev/rtwn/rtl8812a/r12a_tx.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sys/dev/rtwn/rtl8812a/r12a_tx.c b/sys/dev/rtwn/rtl8812a/r12a_tx.c
index cc686668e4a221..582e6e0ddaf464 100644
--- a/sys/dev/rtwn/rtl8812a/r12a_tx.c
+++ b/sys/dev/rtwn/rtl8812a/r12a_tx.c
@@ -103,11 +103,17 @@ r12a_tx_protection(struct rtwn_softc *sc, struct r12a_tx_desc *txd,
 
 	if (mode == IEEE80211_PROT_CTSONLY ||
 	    mode == IEEE80211_PROT_RTSCTS) {
-		/* TODO: VHT */
-		if (RTWN_RATE_IS_HT(ridx))
+		/*
+		 * Note: this code assumes basic rates for protection for
+		 * both 802.11abg and 802.11n rates.
+		 */
+		if (RTWN_RATE_IS_VHT(ridx))
+			rate = rtwn_ctl_vhtrate(ic->ic_rt, ridx);
+		else if (RTWN_RATE_IS_HT(ridx))
 			rate = rtwn_ctl_mcsrate(ic->ic_rt, ridx);
 		else
 			rate = ieee80211_ctl_rate(ic->ic_rt, ridx2rate[ridx]);
+		/* Map basic rate back to ridx */
 		ridx = rate2ridx(IEEE80211_RV(rate));
 
 		txd->txdw4 |= htole32(SM(R12A_TXDW4_RTSRATE, ridx));

From 50c1e179b584f43ba82e9afc91b25ec4831b58ef Mon Sep 17 00:00:00 2001
From: Kristof Provost <kp@FreeBSD.org>
Date: Sun, 5 Jan 2025 17:09:08 +0100
Subject: [PATCH 006/143] umtx: handle allocation failire in umtx_pi_alloc()

Don't assume that this allocation will succeed. We may have been passed
M_NOWAIT.

The calling code already handles allocation failures, but the function
itself did not.

PR:		283807
MFC after:	1 week
---
 sys/kern/kern_umtx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c
index c4a820f41bc367..a9294c324cb422 100644
--- a/sys/kern/kern_umtx.c
+++ b/sys/kern/kern_umtx.c
@@ -1740,6 +1740,9 @@ umtx_pi_alloc(int flags)
 	struct umtx_pi *pi;
 
 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
+	if (pi == NULL)
+		return (NULL);
+
 	TAILQ_INIT(&pi->pi_blocked);
 	atomic_add_int(&umtx_pi_allocated, 1);
 	return (pi);

From d830cac9fed1882a192b0ec5c96fb3ac9bfbc2ee Mon Sep 17 00:00:00 2001
From: Poul-Henning Kamp <phk@FreeBSD.org>
Date: Sun, 5 Jan 2025 20:14:37 +0000
Subject: [PATCH 007/143] recoverdisk: Fix comparison between stripesize and
 sectorsize

Discovered trying to read a 360KB floppy disk :-)
---
 sbin/recoverdisk/recoverdisk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sbin/recoverdisk/recoverdisk.c b/sbin/recoverdisk/recoverdisk.c
index 91f42c904c52b3..446266c36d5055 100644
--- a/sbin/recoverdisk/recoverdisk.c
+++ b/sbin/recoverdisk/recoverdisk.c
@@ -482,7 +482,7 @@ main(int argc, char * const argv[])
 			err(1, "DIOCGSECTORSIZE failed");
 
 		error = ioctl(fdr, DIOCGSTRIPESIZE, &stripesize);
-		if (error == 0 && stripesize > sectorsize)
+		if (error == 0 && stripesize < sectorsize)
 			sectorsize = stripesize;
 
 		minsize = sectorsize;

From a0f06dfb0d188966bee7265ec7d9f20093186bb6 Mon Sep 17 00:00:00 2001
From: Emmanuel Vadot <manu@FreeBSD.org>
Date: Mon, 6 Jan 2025 08:34:02 +0100
Subject: [PATCH 008/143] loader: Add a list of firmware name mapping

Since we started to ship raw firmware for iwm(4), users who loads
the driver from loader are having problems as loader don't know that
the firmwares are now raw files and not kernel modules anymore.
Start a list of default entry for iwm(4) firmwares name mapping so it will
still works when loaded from loader.

Differential Revision:	https://reviews.freebsd.org/D48211
Reviewed by:	bz, imp, kevans
Sponsored by: Beckhoff Automation GmbH & Co. KG
---
 stand/defaults/loader.conf | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/stand/defaults/loader.conf b/stand/defaults/loader.conf
index d266c240955afa..b1e87520a2d41e 100644
--- a/stand/defaults/loader.conf
+++ b/stand/defaults/loader.conf
@@ -109,7 +109,7 @@ kernels_autodetect="YES"	# Auto-detect kernel directories in /boot
 #comconsole_speed="115200"	# Set the current serial console speed
 #console="vidconsole"		# A comma separated list of console(s)
 #currdev="disk1s1a"		# Set the current device
-module_path="/boot/modules;/boot/dtb;/boot/dtb/overlays"	# Set the module search path
+module_path="/boot/modules;/boot/firmware;/boot/dtb;/boot/dtb/overlays"	# Set the module search path
 module_blacklist="drm drm2 radeonkms i915kms amdgpu"	# Loader module blacklist
 #prompt="\\${interpret}"	# Set the command prompt
 #root_disk_unit="0"		# Force the root disk unit number
@@ -182,3 +182,14 @@ module_blacklist="drm drm2 radeonkms i915kms amdgpu"	# Loader module blacklist
 #module_before="cmd"		# executes "cmd" before loading the module
 #module_after="cmd"		# executes "cmd" after loading the module
 #module_error="cmd"		# executes "cmd" if load fails
+
+### Firmware names mapping list
+iwm3160fw_type="firmware"
+iwm7260fw_type="firmware"
+iwm7265fw_type="firmware"
+iwm8265fw_type="firmware"
+iwm9260fw_type="firmware"
+iwm3168fw_type="firmware"
+iwm7265Dfw_type="firmware"
+iwm8000C_type="firmware"
+iwm9000fw_type="firmware"

From 48db612d8939fab6081fc2bf16f0d42aef3c682d Mon Sep 17 00:00:00 2001
From: Emmanuel Vadot <manu@FreeBSD.org>
Date: Thu, 2 Jan 2025 13:46:30 +0100
Subject: [PATCH 009/143] arm64: Add a new SOC_ROCKCHIP option

A lot of drivers are shared between all rockchip SoCs, each time we add
suppot for a new SoC we need to add the options in the files.arm64 lines.
Add a new option SOC_ROCKCHIP that will help simplify this file.

Reviewed by:	andrew
Differential Revision:	https://reviews.freebsd.org/D48286
---
 sys/arm64/conf/std.rockchip |  1 +
 sys/conf/files.arm64        | 30 +++++++++++++++---------------
 sys/conf/options.arm64      |  7 ++++---
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/sys/arm64/conf/std.rockchip b/sys/arm64/conf/std.rockchip
index 3733ddc4eeae4a..d32de4e4fe7950 100644
--- a/sys/arm64/conf/std.rockchip
+++ b/sys/arm64/conf/std.rockchip
@@ -3,6 +3,7 @@
 #
 
 # SoC support
+options 	SOC_ROCKCHIP
 options 	SOC_ROCKCHIP_RK3328
 options 	SOC_ROCKCHIP_RK3399
 options 	SOC_ROCKCHIP_RK3568
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 3335dfe6cab176..b1f3aefadbe140 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -720,18 +720,18 @@ arm64/rockchip/rk3568_pciephy.c			optional fdt pci soc_rockchip_rk3568
 arm64/rockchip/rk_i2s.c				optional fdt sound soc_rockchip_rk3328 | fdt sound soc_rockchip_rk3399
 arm64/rockchip/rk_otp.c				optional fdt soc_rockchip_rk3568
 arm64/rockchip/rk_otp_if.m			optional fdt soc_rockchip_rk3568
-dev/iicbus/pmic/rockchip/rk8xx.c		optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/iicbus/pmic/rockchip/rk8xx_clocks.c		optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/iicbus/pmic/rockchip/rk8xx_regulators.c	optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/iicbus/pmic/rockchip/rk8xx_rtc.c		optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
+dev/iicbus/pmic/rockchip/rk8xx.c		optional fdt rk805 soc_rockchip
+dev/iicbus/pmic/rockchip/rk8xx_clocks.c		optional fdt rk805 soc_rockchip
+dev/iicbus/pmic/rockchip/rk8xx_regulators.c	optional fdt rk805 soc_rockchip
+dev/iicbus/pmic/rockchip/rk8xx_rtc.c		optional fdt rk805 soc_rockchip
 dev/iicbus/pmic/rockchip/rk805.c		optional fdt rk805 soc_rockchip_rk3328
 dev/iicbus/pmic/rockchip/rk808.c		optional fdt rk805 soc_rockchip_rk3399
 dev/iicbus/pmic/rockchip/rk817.c		optional fdt rk817 soc_rockchip_rk3568
-arm64/rockchip/rk_grf.c				optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-arm64/rockchip/rk_pinctrl.c			optional fdt rk_pinctrl soc_rockchip_rk3328 | fdt rk_pinctrl soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-arm64/rockchip/rk_gpio.c			optional fdt rk_gpio soc_rockchip_rk3328 | fdt rk_gpio soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
+arm64/rockchip/rk_grf.c				optional fdt soc_rockchip
+arm64/rockchip/rk_pinctrl.c			optional fdt rk_pinctrl soc_rockchip
+arm64/rockchip/rk_gpio.c			optional fdt rk_gpio soc_rockchip
 arm64/rockchip/rk_iodomain.c			optional fdt rk_iodomain
-arm64/rockchip/rk_usb2phy.c			optional fdt rk_usb2phy soc_rockchip_rk3328 | fdt rk_usb2phy soc_rockchip_rk3399 | fdt rk_usb2phy soc_rockchip_rk3568
+arm64/rockchip/rk_usb2phy.c			optional fdt rk_usb2phy soc_rockchip
 arm64/rockchip/rk_typec_phy.c			optional fdt rk_typec_phy soc_rockchip_rk3399
 arm64/rockchip/rk_tsadc_if.m			optional fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
 arm64/rockchip/rk_tsadc.c			optional fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
@@ -739,13 +739,13 @@ arm64/rockchip/rk_pcie.c			optional fdt pci soc_rockchip_rk3399
 arm64/rockchip/rk_pcie_phy.c			optional fdt pci soc_rockchip_rk3399
 
 # RockChip Clock support
-dev/clk/rockchip/rk_cru.c			optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/clk/rockchip/rk_clk_armclk.c		optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/clk/rockchip/rk_clk_composite.c		optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/clk/rockchip/rk_clk_fract.c			optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/clk/rockchip/rk_clk_gate.c			optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/clk/rockchip/rk_clk_mux.c			optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
-dev/clk/rockchip/rk_clk_pll.c			optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568
+dev/clk/rockchip/rk_cru.c			optional fdt soc_rockchip
+dev/clk/rockchip/rk_clk_armclk.c		optional fdt soc_rockchip
+dev/clk/rockchip/rk_clk_composite.c		optional fdt soc_rockchip
+dev/clk/rockchip/rk_clk_fract.c			optional fdt soc_rockchip
+dev/clk/rockchip/rk_clk_gate.c			optional fdt soc_rockchip
+dev/clk/rockchip/rk_clk_mux.c			optional fdt soc_rockchip
+dev/clk/rockchip/rk_clk_pll.c			optional fdt soc_rockchip
 dev/clk/rockchip/rk3328_cru.c			optional fdt soc_rockchip_rk3328
 dev/clk/rockchip/rk3399_cru.c			optional fdt soc_rockchip_rk3399
 dev/clk/rockchip/rk3399_pmucru.c		optional fdt soc_rockchip_rk3399
diff --git a/sys/conf/options.arm64 b/sys/conf/options.arm64
index e36f856ecb04ad..4bdd408f46512d 100644
--- a/sys/conf/options.arm64
+++ b/sys/conf/options.arm64
@@ -37,7 +37,8 @@ SOC_INTEL_STRATIX10		opt_soc.h
 SOC_MARVELL_8K			opt_soc.h
 SOC_NVIDIA_TEGRA210		opt_soc.h
 SOC_NXP_LS			opt_soc.h
-SOC_ROCKCHIP_RK3328		opt_soc.h
-SOC_ROCKCHIP_RK3399		opt_soc.h
-SOC_ROCKCHIP_RK3568		opt_soc.h
+SOC_ROCKCHIP			opt_soc.h
+SOC_ROCKCHIP_RK3328		opt_soc.h	# Depends on SOC_ROCKCHIP
+SOC_ROCKCHIP_RK3399		opt_soc.h	# Depends on SOC_ROCKCHIP
+SOC_ROCKCHIP_RK3568		opt_soc.h	# Depends on SOC_ROCKCHIP
 SOC_XILINX_ZYNQ			opt_soc.h

From ad1bf74705e4bbf116bf9e285088c4dfb31247e2 Mon Sep 17 00:00:00 2001
From: Emmanuel Vadot <manu@FreeBSD.org>
Date: Thu, 2 Jan 2025 13:51:09 +0100
Subject: [PATCH 010/143] arm64: rockchip: Add a new rk8xx device

This device will select the base driver for Rockchip PMIC.
While here also add a new rk808 device which selects the PMIC used for RK3399

Reviewed by:	andrew
Differential Revision:	https://reviews.freebsd.org/D48287
---
 sys/arm64/conf/std.rockchip |  2 ++
 sys/conf/files.arm64        | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/sys/arm64/conf/std.rockchip b/sys/arm64/conf/std.rockchip
index d32de4e4fe7950..dc4dfadfde3999 100644
--- a/sys/arm64/conf/std.rockchip
+++ b/sys/arm64/conf/std.rockchip
@@ -21,7 +21,9 @@ device		rk_i2c			# RockChip I2C controller
 device		fan53555		# Fairchild Semi FAN53555/SYR82x Regulator
 
 # Power management controllers
+device		rk8xx			# RockChip RK8XX base support
 device		rk805			# RockChip RK805 PMIC
+device		rk808			# RockChip RK805 PMIC
 device		rk817			# RockChip RK817 PMIC
 device		syr827			# Silergy SYR827 PMIC
 device		tcs4525			# TCS 4525 PMIC
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index b1f3aefadbe140..ed251b7ae0d72c 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -720,12 +720,12 @@ arm64/rockchip/rk3568_pciephy.c			optional fdt pci soc_rockchip_rk3568
 arm64/rockchip/rk_i2s.c				optional fdt sound soc_rockchip_rk3328 | fdt sound soc_rockchip_rk3399
 arm64/rockchip/rk_otp.c				optional fdt soc_rockchip_rk3568
 arm64/rockchip/rk_otp_if.m			optional fdt soc_rockchip_rk3568
-dev/iicbus/pmic/rockchip/rk8xx.c		optional fdt rk805 soc_rockchip
-dev/iicbus/pmic/rockchip/rk8xx_clocks.c		optional fdt rk805 soc_rockchip
-dev/iicbus/pmic/rockchip/rk8xx_regulators.c	optional fdt rk805 soc_rockchip
-dev/iicbus/pmic/rockchip/rk8xx_rtc.c		optional fdt rk805 soc_rockchip
+dev/iicbus/pmic/rockchip/rk8xx.c		optional fdt rk8xx soc_rockchip
+dev/iicbus/pmic/rockchip/rk8xx_clocks.c		optional fdt rk8xx soc_rockchip
+dev/iicbus/pmic/rockchip/rk8xx_regulators.c	optional fdt rk8xx soc_rockchip
+dev/iicbus/pmic/rockchip/rk8xx_rtc.c		optional fdt rk8xx soc_rockchip
 dev/iicbus/pmic/rockchip/rk805.c		optional fdt rk805 soc_rockchip_rk3328
-dev/iicbus/pmic/rockchip/rk808.c		optional fdt rk805 soc_rockchip_rk3399
+dev/iicbus/pmic/rockchip/rk808.c		optional fdt rk808 soc_rockchip_rk3399
 dev/iicbus/pmic/rockchip/rk817.c		optional fdt rk817 soc_rockchip_rk3568
 arm64/rockchip/rk_grf.c				optional fdt soc_rockchip
 arm64/rockchip/rk_pinctrl.c			optional fdt rk_pinctrl soc_rockchip

From a071c76746f6f10ac2254ae1d9d2c34beb21a981 Mon Sep 17 00:00:00 2001
From: Emmanuel Vadot <manu@FreeBSD.org>
Date: Mon, 6 Jan 2025 08:39:22 +0100
Subject: [PATCH 011/143] UPDATING: Document recent Rockchip options and device

---
 UPDATING | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/UPDATING b/UPDATING
index 4fef13073431ba..412eb446f4fd3e 100644
--- a/UPDATING
+++ b/UPDATING
@@ -27,6 +27,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 15.x IS SLOW:
 	world, or to merely disable the most expensive debugging functionality
 	at runtime, run "ln -s 'abort:false,junk:false' /etc/malloc.conf".)
 
+20240106:
+	A new SOC_ROCKCHIP options appeared, so if you have a custom kernel configuration
+	targetting Rockchip SoC you need to add it so shared and mandatory drivers for
+	this SoC familly will be selected.
+	Also a new rk8xx device was added, this select the base driver for Rockchip PMIC.
+
 20241216:
 	The iwm(4) firmwares are no longer compiled as kernel modules but instead
 	shipped as raw files. For pkgbase users if you use iwm(4) you will need

From 73ad5af7d90894cdf9f829cecb45af506bbe2dba Mon Sep 17 00:00:00 2001
From: Emmanuel Vadot <manu@FreeBSD.org>
Date: Mon, 6 Jan 2025 12:50:51 +0100
Subject: [PATCH 012/143] arm64: Unbreak LINT build

Add the recently added options SOC_ROCKCHIP and device rk8xx.
While here add options SOC_ROCKCHIP_RK3568 and device rk808/rk817
which where never added.

Fixes:	48db612d8939 ("arm64: Add a new SOC_ROCKCHIP option")
Fixes:	ad1bf74705e4 ("arm64: rockchip: Add a new rk8xx device")
---
 sys/arm64/conf/NOTES | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sys/arm64/conf/NOTES b/sys/arm64/conf/NOTES
index b3bf2fedd5f7ca..54bc7dcf1f0926 100644
--- a/sys/arm64/conf/NOTES
+++ b/sys/arm64/conf/NOTES
@@ -43,8 +43,10 @@ options 	SOC_INTEL_STRATIX10
 options 	SOC_MARVELL_8K
 options 	SOC_NVIDIA_TEGRA210
 options 	SOC_NXP_LS
+options 	SOC_ROCKCHIP
 options 	SOC_ROCKCHIP_RK3328
 options 	SOC_ROCKCHIP_RK3399
+options 	SOC_ROCKCHIP_RK3568
 options 	SOC_XILINX_ZYNQ
 
 # Timer drivers
@@ -161,7 +163,10 @@ device		aw_wdog		# Allwinner Watchdog
 
 # Power management controllers
 device		axp81x		# X-Powers AXP81x PMIC
+device		rk8xx		# RockChip RK8XX base support
 device		rk805		# RockChip RK805 PMIC
+device		rk808		# RockChip RK808 PMIC
+device		rk817		# RockChip RK817 PMIC
 
 # EFUSE
 device		aw_sid		# Allwinner Secure ID EFUSE

From 7f39f03c4d9a138f84a08931b2a6c016521cacf5 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Mon, 6 Jan 2025 08:22:14 -0800
Subject: [PATCH 013/143] libc/xdr: remove bogus lseek(2) for xdr streams

Doing some debugging I noticed that applications using rpc(3) would often
make lseek(2) on a totally bogus file descriptor, that looks more like a
pointer.  So, what happens here is that xdrrec type xdr doesn't keep a
track of how many bytes were sent/received on the stream and tries to
obtain this number via lseek(2).  Then it adds/subtracts the offset in the
internal buffer from the obtained number.  This code originates from the
original Sun RPC import in 1994.  However, it was not a working code even
if Solaris would support lseek(2) on a socket, because it was passing not
the file descriptor, but a pointer to opaque data from upper RPC layer.
It could be that previously (before import to FreeBSD) code was correct,
but the Solaris 8 documentation says that lseek(2) on socket isn't
supported [1].  Maybe supported on older Solaris?

Anyway, this lseek(2) never worked and xdr_getpos() would always fail on
xdrrec object, until 8f55a568f69c5 in 2008 it was slightly fixed to
tolerate failure of lseek(2) and return a correct value within the small
internal buffer for XDR_ENCODE mode and a an incorrect (negative to
unsigned) result for XDR_DECODE.  It seems no consumer ever calls
xdr_getpos()/xdr_setpos() on this kind of descriptor when in XDR_DECODE
mode.

So, remove this lseek(2) and preserve operation within the small buffer
only. Supposedly fix the operation for XDR_DECODE mode.  Note that there
is no use and no test coverage for the XDR_DECODE.

Note that xdr(3) manual page already documents limitations for
xdr_getpos() and xdr_setpos() for the stream type objects.

[1] https://docs.oracle.com/cd/E19109-01/tsolaris8/835-8003/6ruu1b0or/index.html

Reviewed by:		asomers, markj
Differential Revision:	https://reviews.freebsd.org/D48205
---
 lib/libc/xdr/xdr_rec.c | 69 +++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/lib/libc/xdr/xdr_rec.c b/lib/libc/xdr/xdr_rec.c
index f1167fdeaa65d6..7dc9bbb31ec393 100644
--- a/lib/libc/xdr/xdr_rec.c
+++ b/lib/libc/xdr/xdr_rec.c
@@ -318,27 +318,30 @@ xdrrec_putbytes(XDR *xdrs, const char *addr, u_int len)
 	return (TRUE);
 }
 
+/*
+ * XXX: xdrrec operates on a TCP stream and doesn't keep record of how many
+ * bytes were sent/received overall.  Thus, the XDR_GETPOS() and XDR_SETPOS()
+ * can operate only within small internal buffer.  So far, the limited set of
+ * consumers of this xdr are fine with that.  It also seems that methods are
+ * never called in the XDR_DECODE mode.
+ */
 static u_int
 xdrrec_getpos(XDR *xdrs)
 {
 	RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
-	off_t pos;
+	ptrdiff_t pos;
 
-	pos = lseek((int)(u_long)rstrm->tcp_handle, (off_t)0, 1);
-	if (pos == -1)
-		pos = 0;
 	switch (xdrs->x_op) {
-
 	case XDR_ENCODE:
-		pos += rstrm->out_finger - rstrm->out_base;
+		pos = rstrm->out_finger - rstrm->out_base;
 		break;
 
 	case XDR_DECODE:
-		pos -= rstrm->in_boundry - rstrm->in_finger;
+		pos = rstrm->in_finger - rstrm->in_base;
 		break;
 
-	default:
-		pos = (off_t) -1;
+	case XDR_FREE:
+		pos = -1;
 		break;
 	}
 	return ((u_int) pos);
@@ -352,32 +355,30 @@ xdrrec_setpos(XDR *xdrs, u_int pos)
 	int delta = currpos - pos;
 	char *newpos;
 
-	if ((int)currpos != -1)
-		switch (xdrs->x_op) {
-
-		case XDR_ENCODE:
-			newpos = rstrm->out_finger - delta;
-			if ((newpos > (char *)(void *)(rstrm->frag_header)) &&
-				(newpos < rstrm->out_boundry)) {
-				rstrm->out_finger = newpos;
-				return (TRUE);
-			}
-			break;
-
-		case XDR_DECODE:
-			newpos = rstrm->in_finger - delta;
-			if ((delta < (int)(rstrm->fbtbc)) &&
-				(newpos <= rstrm->in_boundry) &&
-				(newpos >= rstrm->in_base)) {
-				rstrm->in_finger = newpos;
-				rstrm->fbtbc -= delta;
-				return (TRUE);
-			}
-			break;
-
-		case XDR_FREE:
-			break;
+	switch (xdrs->x_op) {
+	case XDR_ENCODE:
+		newpos = rstrm->out_finger - delta;
+		if ((newpos > (char *)(void *)(rstrm->frag_header)) &&
+			(newpos < rstrm->out_boundry)) {
+			rstrm->out_finger = newpos;
+			return (TRUE);
 		}
+		break;
+
+	case XDR_DECODE:
+		newpos = rstrm->in_finger - delta;
+		if ((delta < (int)(rstrm->fbtbc)) &&
+			(newpos <= rstrm->in_boundry) &&
+			(newpos >= rstrm->in_base)) {
+			rstrm->in_finger = newpos;
+			rstrm->fbtbc -= delta;
+			return (TRUE);
+		}
+		break;
+
+	case XDR_FREE:
+		break;
+	}
 	return (FALSE);
 }
 

From b5c1f7d479de02f2c886b7453adbea312418ee7a Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Mon, 6 Jan 2025 08:22:15 -0800
Subject: [PATCH 014/143] xdr: use C99 initializers for xdr_ops

No functional change.
---
 sys/xdr/xdr_mbuf.c | 16 ++++++++--------
 sys/xdr/xdr_mem.c  | 36 ++++++++++++++++++------------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/sys/xdr/xdr_mbuf.c b/sys/xdr/xdr_mbuf.c
index 0ed807de903e76..896e317f552629 100644
--- a/sys/xdr/xdr_mbuf.c
+++ b/sys/xdr/xdr_mbuf.c
@@ -46,14 +46,14 @@ static bool_t xdrmbuf_setpos(XDR *, u_int);
 static int32_t *xdrmbuf_inline(XDR *, u_int);
 
 static const struct	xdr_ops xdrmbuf_ops = {
-	xdrmbuf_getlong,
-	xdrmbuf_putlong,
-	xdrmbuf_getbytes,
-	xdrmbuf_putbytes,
-	xdrmbuf_getpos,
-	xdrmbuf_setpos,
-	xdrmbuf_inline,
-	xdrmbuf_destroy
+	.x_getlong =	xdrmbuf_getlong,
+	.x_putlong =	xdrmbuf_putlong,
+	.x_getbytes =	xdrmbuf_getbytes,
+	.x_putbytes =	xdrmbuf_putbytes,
+	.x_getpostn =	xdrmbuf_getpos,
+	.x_setpostn =	xdrmbuf_setpos,
+	.x_inline =	xdrmbuf_inline,
+	.x_destroy =	xdrmbuf_destroy,
 };
 
 /*
diff --git a/sys/xdr/xdr_mem.c b/sys/xdr/xdr_mem.c
index 1489aadf53a0f5..65a74836b7b301 100644
--- a/sys/xdr/xdr_mem.c
+++ b/sys/xdr/xdr_mem.c
@@ -63,27 +63,27 @@ static int32_t *xdrmem_inline_unaligned(XDR *, u_int);
 static bool_t xdrmem_control(XDR *xdrs, int request, void *info);
 
 static const struct	xdr_ops xdrmem_ops_aligned = {
-	xdrmem_getlong_aligned,
-	xdrmem_putlong_aligned,
-	xdrmem_getbytes,
-	xdrmem_putbytes,
-	xdrmem_getpos,
-	xdrmem_setpos,
-	xdrmem_inline_aligned,
-	xdrmem_destroy,
-	xdrmem_control
+	.x_getlong =	xdrmem_getlong_aligned,
+	.x_putlong =	xdrmem_putlong_aligned,
+	.x_getbytes =	xdrmem_getbytes,
+	.x_putbytes =	xdrmem_putbytes,
+	.x_getpostn =	xdrmem_getpos,
+	.x_setpostn =	xdrmem_setpos,
+	.x_inline =	xdrmem_inline_aligned,
+	.x_destroy = 	xdrmem_destroy,
+	.x_control =	xdrmem_control,
 };
 
 static const struct	xdr_ops xdrmem_ops_unaligned = {
-	xdrmem_getlong_unaligned,
-	xdrmem_putlong_unaligned,
-	xdrmem_getbytes,
-	xdrmem_putbytes,
-	xdrmem_getpos,
-	xdrmem_setpos,
-	xdrmem_inline_unaligned,
-	xdrmem_destroy,
-	xdrmem_control
+	.x_getlong =	xdrmem_getlong_unaligned,
+	.x_putlong =	xdrmem_putlong_unaligned,
+	.x_getbytes =	xdrmem_getbytes,
+	.x_putbytes =	xdrmem_putbytes,
+	.x_getpostn =	xdrmem_getpos,
+	.x_setpostn =	xdrmem_setpos,
+	.x_inline =	xdrmem_inline_unaligned,
+	.x_destroy =	xdrmem_destroy,
+	.x_control =	xdrmem_control
 };
 
 /*

From c2153a533ffb9691848a072c7628dcf56e0e6442 Mon Sep 17 00:00:00 2001
From: Alan Somers <asomers@FreeBSD.org>
Date: Fri, 27 Dec 2024 14:24:17 -0700
Subject: [PATCH 015/143] fusefs: minor cleanup in the tests

Delete some unused includes and member variables.

MFC after:	2 weeks
Sponsored by:	ConnectWise
---
 tests/sys/fs/fusefs/allow_other.cc | 3 ---
 tests/sys/fs/fusefs/forget.cc      | 1 -
 tests/sys/fs/fusefs/io.cc          | 1 -
 tests/sys/fs/fusefs/notify.cc      | 1 -
 4 files changed, 6 deletions(-)

diff --git a/tests/sys/fs/fusefs/allow_other.cc b/tests/sys/fs/fusefs/allow_other.cc
index dae6290ea8e5da..24a161166a909c 100644
--- a/tests/sys/fs/fusefs/allow_other.cc
+++ b/tests/sys/fs/fusefs/allow_other.cc
@@ -52,9 +52,6 @@ const static char RELPATH[] = "some_file.txt";
 class NoAllowOther: public FuseTest {
 
 public:
-/* Unprivileged user id */
-int m_uid;
-
 virtual void SetUp() {
 	if (geteuid() != 0) {
 		GTEST_SKIP() << "This test must be run as root";
diff --git a/tests/sys/fs/fusefs/forget.cc b/tests/sys/fs/fusefs/forget.cc
index 846198e7592577..1e7764ac478215 100644
--- a/tests/sys/fs/fusefs/forget.cc
+++ b/tests/sys/fs/fusefs/forget.cc
@@ -31,7 +31,6 @@
 extern "C" {
 #include <sys/types.h>
 #include <sys/mount.h>
-#include <sys/sysctl.h>
 
 #include <fcntl.h>
 #include <semaphore.h>
diff --git a/tests/sys/fs/fusefs/io.cc b/tests/sys/fs/fusefs/io.cc
index f8684ee02100e5..ced291836da046 100644
--- a/tests/sys/fs/fusefs/io.cc
+++ b/tests/sys/fs/fusefs/io.cc
@@ -31,7 +31,6 @@
 extern "C" {
 #include <sys/types.h>
 #include <sys/mman.h>
-#include <sys/sysctl.h>
 
 #include <fcntl.h>
 #include <stdlib.h>
diff --git a/tests/sys/fs/fusefs/notify.cc b/tests/sys/fs/fusefs/notify.cc
index e3f539f57599b2..1e22bde13db72f 100644
--- a/tests/sys/fs/fusefs/notify.cc
+++ b/tests/sys/fs/fusefs/notify.cc
@@ -30,7 +30,6 @@
 
 extern "C" {
 #include <sys/types.h>
-#include <sys/sysctl.h>
 
 #include <fcntl.h>
 #include <pthread.h>

From 58610d1e0fbbd1a49927559ee3970d8e4594cc86 Mon Sep 17 00:00:00 2001
From: Pat Maddox <pat@patmaddox.com>
Date: Fri, 13 Dec 2024 11:57:30 -0800
Subject: [PATCH 016/143] build: Sort distributekernel METALOG when using
 -DNO_ROOT

The metalog is produced by install -M, which is not inherently sorted.
This results in non-deterministic file ordering in kernel.txz. Order the
files in kernel.txz to support reproducible builds.

PR:		283214
Reviewed by:	emaste

Signed-off-by: Pat Maddox <pat@patmaddox.com>
---
 Makefile.inc1 | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/Makefile.inc1 b/Makefile.inc1
index bad747a0e551e9..fe799218ab0a40 100644
--- a/Makefile.inc1
+++ b/Makefile.inc1
@@ -1903,9 +1903,7 @@ distributekernel distributekernel.debug: .PHONY
 	false
 .endif
 	mkdir -p ${DESTDIR}/${DISTDIR}
-.if defined(NO_ROOT)
-	@echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta
-.endif
+	rm -f ${DESTDIR}/${DISTDIR}/kernel.premeta
 	${_+_}cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH:Q} ${MAKE} KERNEL=${INSTKERNNAME} \
@@ -1913,15 +1911,14 @@ distributekernel distributekernel.debug: .PHONY
 	    METALOG=${METALOG:S/METALOG/kernel.premeta/} \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
-	@sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \
-	    ${DESTDIR}/${DISTDIR}/kernel.meta
+	echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.meta
+	sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta | \
+	    ${METALOG_SORT_CMD} >> ${DESTDIR}/${DISTDIR}/kernel.meta
 .endif
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
-.if defined(NO_ROOT)
-	@echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta
-.endif
+	rm -f ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH:Q} ${MAKE} \
@@ -1930,9 +1927,10 @@ distributekernel distributekernel.debug: .PHONY
 	    METALOG=${METALOG:S/METALOG/kernel.${_kernel}.premeta/} \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
-	@sed -e "s|^./kernel.${_kernel}|.|" \
-	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \
-	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta
+	echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta
+	sed -e "s|^./kernel.${_kernel}|.|" \
+	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta | \
+	    ${METALOG_SORT_CMD} >> ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta
 .endif
 .endfor
 .endif

From f415b2ef30f7bf0db753f09fbba7b0910475b0d2 Mon Sep 17 00:00:00 2001
From: Alan Somers <asomers@FreeBSD.org>
Date: Mon, 6 Jan 2025 12:21:29 -0700
Subject: [PATCH 017/143] fusefs: Coverity cleanup in the lseek tests

Always check the return value of open().

Reported by:	Coverity Scan
CID:		1471118 1471133 1471215 1471896 1471901 1472116 1473799
CID:		1473879 1473996 1555269 1558044
MFC after:	2 weeks
Sponsored by:	ConnectWise
---
 tests/sys/fs/fusefs/lseek.cc | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/sys/fs/fusefs/lseek.cc b/tests/sys/fs/fusefs/lseek.cc
index 2a1cb198bccee0..12d41f7af1b26f 100644
--- a/tests/sys/fs/fusefs/lseek.cc
+++ b/tests/sys/fs/fusefs/lseek.cc
@@ -71,6 +71,7 @@ TEST_F(LseekPathconf, already_enosys)
 	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
 
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 
 	EXPECT_EQ(offset_in, lseek(fd, offset_in, SEEK_DATA));
 	EXPECT_EQ(-1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
@@ -105,6 +106,7 @@ TEST_F(LseekPathconf, already_seeked)
 		out.body.lseek.offset = i.body.lseek.offset;
 	})));
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 	EXPECT_EQ(offset, lseek(fd, offset, SEEK_DATA));
 
 	EXPECT_EQ(1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
@@ -171,6 +173,7 @@ TEST_F(LseekPathconf, eio)
 	.WillRepeatedly(Invoke(ReturnErrno(EIO)));
 
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 
 	EXPECT_EQ(-1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
 	EXPECT_EQ(EIO, errno);
@@ -203,6 +206,7 @@ TEST_F(LseekPathconf, enosys_now)
 	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
 
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 
 	EXPECT_EQ(-1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
 	EXPECT_EQ(EINVAL, errno);
@@ -266,6 +270,7 @@ TEST_F(LseekPathconf, seek_now)
 	})));
 
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 	EXPECT_EQ(offset_initial, lseek(fd, offset_initial, SEEK_SET));
 	EXPECT_EQ(1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
 	/* And check that the file pointer hasn't changed */
@@ -299,6 +304,7 @@ TEST_F(LseekPathconf, zerolength)
 	).WillOnce(Invoke(ReturnErrno(ENXIO)));
 
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 	EXPECT_EQ(1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
 	/* Check again, to ensure that the kernel recorded the response */
 	EXPECT_EQ(1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
@@ -327,6 +333,7 @@ TEST_F(LseekPathconf_7_23, already_enosys)
 	).Times(0);
 
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 	EXPECT_EQ(-1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
 	EXPECT_EQ(EINVAL, errno);
 
@@ -391,6 +398,7 @@ TEST_F(LseekSeekData, enosys)
 		_)
 	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 
 	/*
 	 * Default behavior: ENXIO if offset is < 0 or >= fsize, offset
@@ -431,6 +439,7 @@ TEST_F(LseekSeekHole, ok)
 		out.body.lseek.offset = offset_out;
 	})));
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 	EXPECT_EQ(offset_out, lseek(fd, offset_in, SEEK_HOLE));
 	EXPECT_EQ(offset_out, lseek(fd, 0, SEEK_CUR));
 
@@ -463,6 +472,7 @@ TEST_F(LseekSeekHole, enosys)
 		_)
 	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 
 	/*
 	 * Default behavior: ENXIO if offset is < 0 or >= fsize, fsize
@@ -500,6 +510,7 @@ TEST_F(LseekSeekHole, enxio)
 		_)
 	).WillOnce(Invoke(ReturnErrno(ENXIO)));
 	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd);
 	EXPECT_EQ(-1, lseek(fd, offset_in, SEEK_HOLE));
 	EXPECT_EQ(ENXIO, errno);
 

From bb9525f30214e8b6c53c6cccd9e8f02e8f8e8c42 Mon Sep 17 00:00:00 2001
From: Michael Tuexen <tuexen@FreeBSD.org>
Date: Mon, 6 Jan 2025 20:35:11 +0100
Subject: [PATCH 018/143] TCP RACK: fix TCP fast open

Do not jump to a place in the code, which requires several variables
to be set (segsize, minseg, idle, len, sb_offset), which is not true.
To avoid using these variables, start the HPTS timer explicitly.
This fix only applies to the client side using TCP fast open.

Approved by:		rrs
CID:			1523766
CID:			1523770
CID:			1523786
CID:			1523801
CID:			1523809
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D48322
---
 sys/netinet/tcp_stacks/rack.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index cc07253247609c..7baf1a6267875b 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -19943,10 +19943,11 @@ rack_output(struct tcpcb *tp)
 	     (tp->t_state == TCPS_SYN_SENT)) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
 	    (tp->t_rxtshift == 0)) {              /* not a retransmit */
-		cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
-		so = inp->inp_socket;
-		sb = &so->so_snd;
-		goto just_return_nolock;
+		rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
+#ifdef TCP_ACCOUNTING
+		sched_unpin();
+#endif
+		return (0);
 	}
 	/*
 	 * Determine length of data that should be transmitted, and flags

From c28fefe1dc44b69743dd18d038440da38a2867a7 Mon Sep 17 00:00:00 2001
From: Michael Tuexen <tuexen@FreeBSD.org>
Date: Mon, 6 Jan 2025 20:40:33 +0100
Subject: [PATCH 019/143] TCP BBR: remove dead code

bw is unsigned and not zero. So it cannot be smaller than 1.
No functional change intended.

Reviewed by:		rrs, cc
CID:			1523791
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D48323
---
 sys/netinet/tcp_stacks/bbr.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 535766a0a1b004..97ff46f0b96b8f 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -2993,9 +2993,6 @@ __bbr_get_bw(struct tcp_bbr *bbr)
 		/* We should not be at 0, go to the initial window then  */
 		goto use_initial_window;
 	}
-	if (bw < 1)
-		/* Probably should panic */
-		bw = 1;
 	if (bw < min_bw)
 		bw = min_bw;
 	return (bw);

From 061727efe1e355fb2fde1b05e92718543d05bfe7 Mon Sep 17 00:00:00 2001
From: Michael Tuexen <tuexen@FreeBSD.org>
Date: Mon, 6 Jan 2025 21:25:58 +0100
Subject: [PATCH 020/143] TCP BBR: remove dead code

No functional change intended.

Reviewed by:		rrs
CID:			1523808
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D48338
---
 sys/netinet/tcp_stacks/bbr.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 97ff46f0b96b8f..4600088bd1a119 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -978,14 +978,6 @@ bbr_timer_audit(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, struct sock
 				 * and we do
 				 */
 				return;
-		} else if (sbavail(&inp->inp_socket->so_snd) &&
-		    (tmr_up == PACE_TMR_RXT)) {
-			/*
-			 * if we hit enobufs then we would expect the
-			 * possibility of nothing outstanding and the RXT up
-			 * (and the hptsi timer).
-			 */
-			return;
 		} else if (((V_tcp_always_keepalive ||
 			    inp->inp_socket->so_options & SO_KEEPALIVE) &&
 			    (tp->t_state <= TCPS_CLOSING)) &&

From e8ec28047df5185582a95c5211ed75682fad5ec5 Mon Sep 17 00:00:00 2001
From: Michael Tuexen <tuexen@FreeBSD.org>
Date: Mon, 6 Jan 2025 21:38:34 +0100
Subject: [PATCH 021/143] TCP RACK: fix TCP_RACK_PACING_BETA socket option

Bring back the code, which was accidentally removed. While there,
indent a comment correctly.

Reviewed by:		rrs
CID:			1540026
Fixes:			e18b97bd63a8 ("Update to bring the rack stack with all its fixes in.")
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D48340
---
 sys/netinet/tcp_stacks/rack.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 7baf1a6267875b..f590edd71d9d3c 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -24486,15 +24486,29 @@ rack_get_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 	 * when you exit recovery.
 	 */
 	case TCP_RACK_PACING_BETA:
+		if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0)
+			error = EINVAL;
+		else if (rack->rc_pacing_cc_set == 0)
+			optval = rack->r_ctl.rc_saved_beta.beta;
+		else {
+			/*
+			 * Reach out into the CC data and report back what
+			 * I have previously set. Yeah it looks hackish but
+			 * we don't want to report the saved values.
+			 */
+			if (tp->t_ccv.cc_data)
+				optval = ((struct newreno *)tp->t_ccv.cc_data)->beta;
+			else
+				error = EINVAL;
+		}
 		break;
-		/*
-		 * Beta_ecn is the congestion control value for NewReno that influences how
-		 * much of a backoff happens when a ECN mark is detected. It is normally set
-		 * to 80 for 80% i.e. the cwnd is reduced by 20% of its previous value when
-		 * you exit recovery. Note that classic ECN has a beta of 50, it is only
-		 * ABE Ecn that uses this "less" value, but we do too with pacing :)
-		 */
-
+	/*
+	 * Beta_ecn is the congestion control value for NewReno that influences how
+	 * much of a backoff happens when a ECN mark is detected. It is normally set
+	 * to 80 for 80% i.e. the cwnd is reduced by 20% of its previous value when
+	 * you exit recovery. Note that classic ECN has a beta of 50, it is only
+	 * ABE Ecn that uses this "less" value, but we do too with pacing :)
+	 */
 	case TCP_RACK_PACING_BETA_ECN:
 		if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0)
 			error = EINVAL;

From 9743e9efdf5f0d2338d7cfeed8f09d89d889bac4 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Mon, 6 Jan 2025 20:53:52 +0000
Subject: [PATCH 022/143] SO_SPLICE tests: Fix a comment typo

MFC after:	1 week
Sponsored by:	Klara, Inc.
---
 tests/sys/kern/socket_splice.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/sys/kern/socket_splice.c b/tests/sys/kern/socket_splice.c
index 3970f16b34dce1..3a85ae91ecc79d 100644
--- a/tests/sys/kern/socket_splice.c
+++ b/tests/sys/kern/socket_splice.c
@@ -330,7 +330,7 @@ ATF_TC_BODY(splice_capsicum, tc)
 	tcp4_socketpair(right);
 
 	/*
-	 * Make sure that we splice a socket that's missing recv rights.
+	 * Make sure that we can't splice a socket that's missing recv rights.
 	 */
 	remove_rights(left[1], cap_rights_init(&rights, CAP_RECV));
 	splice_init(&sp, right[0], 0, NULL);

From 8c75c15d43e4123bc51f24f5bf99319289c45a6c Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Mon, 6 Jan 2025 22:53:38 +0000
Subject: [PATCH 023/143] jail: Avoid a potential use-after-free when
 destroying jails

prison_deref() and prison_deref_kill() have to handle the case where
destruction of a jail will release the final reference on the jail's
parent, resulting in destruction of the parent jail.  They thus maintain
a list of jails whose references have gone away; the loop at the end of
prison_deref() then goes through the list and deallocates resources
associated with each jail.  In particular, if a jail's VNET is not the
same as that of its parent, this loop destroys the VNET.

Suppose prison_deref() removes the last reference on a jail, releasing a
reference to its parent and causing the jail to be placed in the
"freeprison" list.  Suppose then that the parent jail is destroyed
before the "freeprison" list is processed.  When destroying the
now-orphaned child jail, prison_deref() derefences its parent to see
whether the child jail's VNET needs to be freed, but if this race
occurs, this is a use-after-free.

Fix the problem by using PR_VNET to decide whether the jail's VNET is to
be destroyed, rather than dereferencing the parent jail pointer.  Set it
earlier so that a subsequent failure in kern_jail_set() cleans up the
nascent VNET.

Reviewed by:	zlei (previous version), jamie
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D47992
---
 sys/kern/kern_jail.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index ad6483ed374d68..6ffeab59112b47 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -1687,9 +1687,18 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 			    sizeof(pr->pr_osrelease));
 
 #ifdef VIMAGE
-		/* Allocate a new vnet if specified. */
-		pr->pr_vnet = (pr_flags & PR_VNET)
-		    ? vnet_alloc() : ppr->pr_vnet;
+		/*
+		 * Allocate a new vnet if specified.
+		 *
+		 * Set PR_VNET now if so, so that the vnet is disposed of
+		 * properly when the jail is destroyed.
+		 */
+		if (pr_flags & PR_VNET) {
+			pr->pr_flags |= PR_VNET;
+			pr->pr_vnet = vnet_alloc();
+		} else {
+			pr->pr_vnet = ppr->pr_vnet;
+		}
 #endif
 		/*
 		 * Allocate a dedicated cpuset for each jail.
@@ -3207,9 +3216,12 @@ prison_deref(struct prison *pr, int flags)
 					 * Removing a prison frees references
 					 * from its parent.
 					 */
+					ppr = pr->pr_parent;
+					pr->pr_parent = NULL;
 					mtx_unlock(&pr->pr_mtx);
+
+					pr = ppr;
 					flags &= ~PD_LOCKED;
-					pr = pr->pr_parent;
 					flags |= PD_DEREF | PD_DEUREF;
 					continue;
 				}
@@ -3236,7 +3248,7 @@ prison_deref(struct prison *pr, int flags)
 	 */
 	TAILQ_FOREACH_SAFE(rpr, &freeprison, pr_list, tpr) {
 #ifdef VIMAGE
-		if (rpr->pr_vnet != rpr->pr_parent->pr_vnet)
+		if (rpr->pr_flags & PR_VNET)
 			vnet_destroy(rpr->pr_vnet);
 #endif
 		if (rpr->pr_root != NULL)

From 1c933f464fdbb630f9663751f04c29cdcda38902 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Mon, 6 Jan 2025 22:55:38 +0000
Subject: [PATCH 024/143] unix: Be consistent about error handling for
 unconnected sockets

SOCK_STREAM and SOCK_SEQPACKET sockets should get the same treatment
here.

PR:		176420
MFC after:	2 weeks
---
 sys/kern/uipc_usrreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 7af73a1d344be3..3d7e5bcc5ad014 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1796,7 +1796,7 @@ uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 			if (unp->unp_flags & UNP_HAVEPC)
 				xu = unp->unp_peercred;
 			else {
-				if (so->so_type == SOCK_STREAM)
+				if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 					error = ENOTCONN;
 				else
 					error = EINVAL;

From 5bf3ac7ae219f126cf3965be97a2d718007c1be4 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Mon, 6 Jan 2025 22:56:07 +0000
Subject: [PATCH 025/143] bsdinstall: Fix a typo in a comment

PR:		283507
MFC after:	1 week
---
 usr.sbin/bsdinstall/scripts/zfsboot | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/usr.sbin/bsdinstall/scripts/zfsboot b/usr.sbin/bsdinstall/scripts/zfsboot
index 45c023b065133e..6f2244a918bc6e 100755
--- a/usr.sbin/bsdinstall/scripts/zfsboot
+++ b/usr.sbin/bsdinstall/scripts/zfsboot
@@ -139,7 +139,7 @@ f_include $BSDCFG_SHARE/variable.subr
 #
 # Default ZFS datasets for root zpool
 #
-# NOTE: Requires /tmp, /var/tmp, /$ZFSBOOT_BOOTFS_NAME/$ZFSBOOT_BOOTFS_NAME
+# NOTE: Requires /tmp, /var/tmp, /$ZFSBOOT_BEROOT_NAME/$ZFSBOOT_BOOTFS_NAME
 # NOTE: Anything after pound/hash character [#] is ignored as a comment.
 #
 f_isset ZFSBOOT_DATASETS || ZFSBOOT_DATASETS="

From 872686b17e70636f031436b458262eb7dacc5832 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Mon, 6 Jan 2025 23:20:00 +0000
Subject: [PATCH 026/143] rc: Document rtadvd_flags

PR:		283696
MFC after:	1 week
---
 libexec/rc/rc.conf       |  1 +
 share/man/man5/rc.conf.5 | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/libexec/rc/rc.conf b/libexec/rc/rc.conf
index 62756fece2016f..9540cca6c2f104 100644
--- a/libexec/rc/rc.conf
+++ b/libexec/rc/rc.conf
@@ -551,6 +551,7 @@ rtadvd_enable="NO"		# Set to YES to enable an IPv6 router
 				# advertisement daemon. If set to YES,
 				# this router becomes a possible candidate
 				# IPv6 default router for local subnets.
+rtadvd_flags=""			# Flags to the IPv6 router advertisement daemon.
 rtadvd_interfaces=""		# Interfaces rtadvd sends RA packets.
 stf_interface_ipv4addr=""	# Local IPv4 addr for 6to4 IPv6 over IPv4
 				# tunneling interface. Specify this entry
diff --git a/share/man/man5/rc.conf.5 b/share/man/man5/rc.conf.5
index a66477ae4510de..8ad503f792e899 100644
--- a/share/man/man5/rc.conf.5
+++ b/share/man/man5/rc.conf.5
@@ -22,7 +22,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd October 14, 2024
+.Dd January 6, 2025
 .Dt RC.CONF 5
 .Os
 .Sh NAME
@@ -3133,6 +3133,14 @@ the interfaces specified in
 This should only be enabled with great care.
 You may want to fine-tune
 .Xr rtadvd.conf 5 .
+.It Va rtadvd_flags
+.Pq Vt str
+If
+.Va rtadvd_enable
+is set to
+.Dq Li YES ,
+these are the flags to pass to
+.Xr rtadvd 8 .
 .It Va rtadvd_interfaces
 .Pq Vt str
 If

From 378a2b155aaf853933df5b53e174b3880826488c Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Mon, 6 Jan 2025 23:20:08 +0000
Subject: [PATCH 027/143] netipsec: Pass the right mbuf up

Note that key_spdacquire() is dead code, as the SADB_X_SPDACQUIRE
message handler is not set.

PR:		243057
MFC after:	2 weeks
---
 sys/netipsec/key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c
index ad1d6164f15815..3c64a65f024d38 100644
--- a/sys/netipsec/key.c
+++ b/sys/netipsec/key.c
@@ -2595,7 +2595,7 @@ key_spdacquire(struct secpolicy *sp)
 	mtod(result, struct sadb_msg *)->sadb_msg_len =
 	    PFKEY_UNIT64(result->m_pkthdr.len);
 
-	return key_sendup_mbuf(NULL, m, KEY_SENDUP_REGISTERED);
+	return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED);
 }
 
 /*

From 02ebbc781f082df9714e74775700d8c08bac7850 Mon Sep 17 00:00:00 2001
From: Warner Losh <imp@FreeBSD.org>
Date: Mon, 6 Jan 2025 16:44:21 -0700
Subject: [PATCH 028/143] swab: Fix implementation to support overlapping
 copies

A number of image processing packages assume that swab() can handle to
and from being the same. However, POSIX.1 states that overlapping
buffers produces undefined results. Our old implementation would produce
coherent results, but the recent change to the musl-inspired code does
not. Since there's complaints in the forums for these image processing
packages for musl and now FreeBSD, update the algorithm to just read a
word at a time and bswap16 the results. All FreeBSD's architecutres
support unaligned access in userland, and swab is not used in the kernel
(g_part_apm has its own copy), so opt for even simpler code that's
easier to understand. This makes the overlapping behavior match i386 again,
since its assembler routine for swab handles overlapping correctly.

PR: 283698
Sponsored by: Netflix
Reviewed by:	nwhitehorn
Differential Revision:	https://reviews.freebsd.org/D48259
---
 lib/libc/string/swab.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/libc/string/swab.c b/lib/libc/string/swab.c
index 2b044d68ca4603..ed4436a4981066 100644
--- a/lib/libc/string/swab.c
+++ b/lib/libc/string/swab.c
@@ -4,19 +4,22 @@
  */
 
 #include <unistd.h>
+#include <sys/endian.h>
 
 void
 swab(const void * __restrict from, void * __restrict to, ssize_t len)
 {
-	const unsigned char *f = from;
-	unsigned char *t = to;
+	const uint16_t *f __aligned(1) = from;
+	uint16_t *t __aligned(1) = to;
 
+	/*
+	 * POSIX says overlapping copy behavior is undefined, however many
+	 * applications assume the old FreeBSD and current GNU libc behavior
+	 * that will swap the bytes correctly when from == to. Reading both bytes
+	 * and swapping them before writing them back accomplishes this.
+	 */
 	while (len > 1) {
-		t[0] = f[1];
-		t[1] = f[0];
-
-		f += 2;
-		t += 2;
+		*t++ = bswap16(*f++);
 		len -= 2;
 	}
 }

From 6fc164c7775a5dc7a4277969870abd50eb62cd1e Mon Sep 17 00:00:00 2001
From: Warner Losh <imp@FreeBSD.org>
Date: Mon, 6 Jan 2025 16:45:47 -0700
Subject: [PATCH 029/143] cdefs.9: Note only one programming environment at a
 time

Only one programming environment at a time can be defined at a
time. Posix states that when defining _POSIX_C_SOURCE, the system
headers must define only the macros, variables, and functions that a
given standard level defines. Selecting a different macro along with
this is fundamentally incompatible with that.

Sponsored by:		Netflix
---
 share/man/man9/cdefs.9 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/share/man/man9/cdefs.9 b/share/man/man9/cdefs.9
index b9821c70f2a124..2e6f1b440efbd4 100644
--- a/share/man/man9/cdefs.9
+++ b/share/man/man9/cdefs.9
@@ -3,7 +3,7 @@
 .\"
 .\" SPDX-License-Identifier: BSD-2-Clause
 .\"
-.Dd December 6, 2024
+.Dd January 6, 2025
 .Dt CDEFS 9
 .Os
 .Sh NAME
@@ -352,6 +352,7 @@ Defining the macros outlined below requests that the system header files provide
 only the functions, structures and macros (symbols) defined by the appropriate
 standard, while suppressing all extensions.
 However, system headers not defined by that standard may define extensions.
+You may only define one of the following for any compilation unit.
 .Bl -column "---------------"
 .It Sy Macro Ta Sy Environment
 .It Dv _POSIX_SOURCE Ta St -p1003.1-88 including St -ansiC

From cfd8866818abb68fbfbffb925298c0b457cb32b5 Mon Sep 17 00:00:00 2001
From: Warner Losh <imp@FreeBSD.org>
Date: Mon, 6 Jan 2025 16:45:51 -0700
Subject: [PATCH 030/143] cdefs.h: Add warning about defining __BSD_VISIBLE and
 friends

Undefined things happen if users define these macros, be more explicit
about documenting that.

Sponsored by:		Netflix
---
 share/man/man9/cdefs.9 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/share/man/man9/cdefs.9 b/share/man/man9/cdefs.9
index 2e6f1b440efbd4..4efce132d3932c 100644
--- a/share/man/man9/cdefs.9
+++ b/share/man/man9/cdefs.9
@@ -401,7 +401,8 @@ are also included.
 These macros are set by
 .Nm
 to control the visibility of different standards.
-Users should not use these, but they are documented here for developers.
+Users must not define these, and doing so will produced undefined results.
+They are documented here for developers working on system's header files.
 .Bl -column "---------------"
 .It Dv __XSI_VISIBLE Ta Restricts the visibility of XOPEN Single Unix Standard version.
 Possible values are 500, 600, 700 or 800, corresponding to Issue 5, 6, 7, or 8

From 080f68d0ab0c87950ecd9b393a156b1e4d12c825 Mon Sep 17 00:00:00 2001
From: Ariel Ehrenberg <aehrenberg@nvidia.com>
Date: Wed, 4 Dec 2024 11:32:54 +0200
Subject: [PATCH 031/143] mlx5_core: Add steering support for IPsec with IPv6

ipv6 flow tables were not connected to previous FS tables.
Created an additional table to serve as IPsec RX root.
This table has 2 rules for redirecting the received packets
to ipv4/ipv6 based on the IP family in the packet header.

Sponsored by:	   NVidia networking
---
 sys/dev/mlx5/mlx5_accel/ipsec.h           |   2 +
 sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c   | 157 ++++++++++++++++++++--
 sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c |   4 +-
 3 files changed, 149 insertions(+), 14 deletions(-)

diff --git a/sys/dev/mlx5/mlx5_accel/ipsec.h b/sys/dev/mlx5/mlx5_accel/ipsec.h
index 95742c4099f192..361b9f72d873af 100644
--- a/sys/dev/mlx5/mlx5_accel/ipsec.h
+++ b/sys/dev/mlx5/mlx5_accel/ipsec.h
@@ -43,6 +43,7 @@ struct mlx5e_priv;
 struct mlx5e_tx_wqe;
 struct mlx5e_ipsec_tx;
 struct mlx5e_ipsec_rx;
+struct mlx5e_ipsec_rx_ip_type;
 
 struct aes_gcm_keymat {
 	u64   seq_iv;
@@ -128,6 +129,7 @@ struct mlx5e_ipsec {
 	struct mlx5e_ipsec_tx *tx;
 	struct mlx5e_ipsec_rx *rx_ipv4;
 	struct mlx5e_ipsec_rx *rx_ipv6;
+	struct mlx5e_ipsec_rx_ip_type *rx_ip_type;
 	struct mlx5e_ipsec_aso *aso;
 	u32 pdn;
 	u32 mkey;
diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c
index f7950bf612698a..fb9ca94278db24 100644
--- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c
+++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c
@@ -138,6 +138,14 @@ struct mlx5e_ipsec_rx_roce {
 	struct mlx5_flow_namespace *ns_rdma;
 };
 
+struct mlx5e_ipsec_rx_ip_type {
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_handle *ipv4_rule;
+	struct mlx5_flow_handle *ipv6_rule;
+	struct mlx5e_ipsec_miss miss;
+};
+
 struct mlx5e_ipsec_rx {
 	struct mlx5e_ipsec_ft ft;
 	struct mlx5e_ipsec_miss pol;
@@ -497,6 +505,16 @@ static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr,
                             outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16);
 }
 
+static void
+setup_fte_ip_version(struct mlx5_flow_spec *spec, u8 family)
+{
+        spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
+        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+        MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+                 family == AF_INET ? 4 : 6);
+}
+
 static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
@@ -1598,9 +1616,18 @@ static void ipsec_fs_rx_roce_table_destroy(struct mlx5e_ipsec_rx_roce *rx_roce)
 	mlx5_destroy_flow_table(rx_roce->ft);
 }
 
+static void
+ipsec_fs_rx_ip_type_catchall_rule_destroy(struct mlx5e_ipsec_rx_ip_type* rx_ip_type)
+{
+	mlx5_del_flow_rules(&rx_ip_type->ipv4_rule);
+	mlx5_del_flow_rules(&rx_ip_type->ipv6_rule);
+	mlx5_del_flow_rules(&rx_ip_type->miss.rule);
+	mlx5_destroy_flow_group(rx_ip_type->miss.group);
+	rx_ip_type->miss.group = NULL;
+}
+
 static void ipsec_fs_rx_table_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 {
-	mutex_lock(&rx->ft.mutex);
 	if (rx->chains) {
 		ipsec_chains_destroy(rx->chains);
 	} else {
@@ -1610,7 +1637,6 @@ static void ipsec_fs_rx_table_destroy(struct mlx5_core_dev *mdev, struct mlx5e_i
 	mlx5_destroy_flow_table(rx->ft.sa);
 	mlx5_destroy_flow_table(rx->ft.status);
 	ipsec_fs_rx_roce_table_destroy(&rx->roce);
-	mutex_unlock(&rx->ft.mutex);
 }
 
 static void ipsec_roce_setup_udp_dport(struct mlx5_flow_spec *spec, u16 dport)
@@ -1831,6 +1857,90 @@ static int ipsec_fs_rx_roce_tables_create(struct mlx5e_ipsec_rx *rx,
 	return err;
 }
 
+static int
+ipsec_fs_rx_ip_type_catchall_rules_create(struct mlx5e_priv *priv,
+                                          struct mlx5_flow_destination *defdst)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+	struct mlx5_flow_destination dst = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		return -ENOMEM;
+	}
+	dst.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+	/* Set rule for ipv4 packets */
+	dst.ft = ipsec->rx_ipv4->ft.pol;
+	setup_fte_ip_version(spec, AF_INET);
+	rule = mlx5_add_flow_rules(ipsec->rx_ip_type->ft, spec, &flow_act, &dst, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add ipv4 rule to ip_type table err=%d\n",
+			      err);
+		goto out;
+	}
+	ipsec->rx_ip_type->ipv4_rule = rule;
+
+	/* Set rule for ipv6 packets */
+	dst.ft = ipsec->rx_ipv6->ft.pol;
+	setup_fte_ip_version(spec, AF_INET6);
+	rule = mlx5_add_flow_rules(ipsec->rx_ip_type->ft, spec, &flow_act, &dst, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add ipv6 rule to ip_type table err=%d\n",
+			      err);
+		goto fail_add_ipv6_rule;
+	}
+	ipsec->rx_ip_type->ipv6_rule = rule;
+
+	/* set miss rule */
+	err = ipsec_miss_create(mdev, ipsec->rx_ip_type->ft, &ipsec->rx_ip_type->miss, defdst);
+	if (err) {
+		mlx5_core_err(mdev, "Failed to add miss rule to ip_type table err=%d\n",
+			          err);
+		goto fail_miss_rule;
+	}
+
+	goto out;
+
+fail_miss_rule:
+	mlx5_del_flow_rules(&ipsec->rx_ip_type->ipv6_rule);
+fail_add_ipv6_rule:
+	mlx5_del_flow_rules(&ipsec->rx_ip_type->ipv4_rule);
+out:
+	kvfree(spec);
+	return err;
+}
+
+static int
+ipsec_fs_rx_ip_type_table_create(struct mlx5e_priv *priv,
+                                 int level)
+{
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+	struct mlx5_flow_table *ft;
+	int err = 0;
+
+	/* Create rx ip type table */
+	ft = ipsec_rx_ft_create(ipsec->rx_ip_type->ns, level, 0, 1);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		goto out;
+	}
+	ipsec->rx_ip_type->ft = ft;
+
+	priv->fts.ipsec_ft = priv->ipsec->rx_ip_type->ft;
+
+out:
+	return err;
+}
+
 static int ipsec_fs_rx_table_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx,
 				    int rx_init_level, int rdma_init_level)
 {
@@ -1996,6 +2106,7 @@ void mlx5e_accel_ipsec_fs_rx_catchall_rules_destroy(struct mlx5e_priv *priv)
 	if (!priv->ipsec)
 		return;
 
+	ipsec_fs_rx_ip_type_catchall_rule_destroy(priv->ipsec->rx_ip_type);
 	ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv4);
 	ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);
 }
@@ -2019,6 +2130,13 @@ int mlx5e_accel_ipsec_fs_rx_catchall_rules(struct mlx5e_priv *priv)
 	err = ipsec_fs_rx_catchall_rules(priv, ipsec->rx_ipv4, &dest);
 	if (err)
 		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);
+
+	err = ipsec_fs_rx_ip_type_catchall_rules_create(priv, &dest);
+	if (err) {
+		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);
+		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv4);
+	}
+
 out:
 	return err;
 }
@@ -2032,6 +2150,7 @@ void mlx5e_accel_ipsec_fs_rx_tables_destroy(struct mlx5e_priv *priv)
 	if (!ipsec)
 		return;
 
+	mlx5_destroy_flow_table(ipsec->rx_ip_type->ft);
 	ipsec_fs_rx_table_destroy(mdev, ipsec->rx_ipv6);
 	ipsec_fs_rx_table_destroy(mdev, ipsec->rx_ipv4);
 }
@@ -2045,18 +2164,24 @@ int mlx5e_accel_ipsec_fs_rx_tables_create(struct mlx5e_priv *priv)
 	if (!ipsec)
 		return 0;
 
-	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv4, 0, 0);
+	err = ipsec_fs_rx_ip_type_table_create(priv, 0);
 	if (err)
-		goto out;
+		return err;
 
-	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv6, 4, 1);
-	if (err) {
-		ipsec_fs_rx_table_destroy(priv->mdev, ipsec->rx_ipv4);
-		goto out;
-	}
+	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv4, 1, 0);
+	if (err)
+		goto err_ipv4_table;
 
-	priv->fts.ipsec_ft = priv->ipsec->rx_ipv4->ft.pol;
-out:
+	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv6, 5, 1);
+	if (err)
+		goto err_ipv6_table;
+
+	return 0;
+
+err_ipv6_table:
+	ipsec_fs_rx_table_destroy(priv->mdev, ipsec->rx_ipv4);
+err_ipv4_table:
+	mlx5_destroy_flow_table(ipsec->rx_ip_type->ft);
 	return err;
 }
 
@@ -2067,6 +2192,7 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
 	mutex_destroy(&ipsec->rx_ipv4->ft.mutex);
 	mutex_destroy(&ipsec->tx->ft.mutex);
 	ipsec_fs_destroy_counters(ipsec);
+	kfree(ipsec->rx_ip_type);
 	kfree(ipsec->rx_ipv6);
 	kfree(ipsec->rx_ipv4);
 	kfree(ipsec->tx);
@@ -2089,9 +2215,13 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 	if (!ipsec->tx)
 		return -ENOMEM;
 
+	ipsec->rx_ip_type = kzalloc(sizeof(*ipsec->rx_ip_type), GFP_KERNEL);
+	if (!ipsec->rx_ip_type)
+		goto err_tx;
+
 	ipsec->rx_ipv4 = kzalloc(sizeof(*ipsec->rx_ipv4), GFP_KERNEL);
 	if (!ipsec->rx_ipv4)
-		goto err_tx;
+		goto err_ip_type;
 
 	ipsec->rx_ipv6 = kzalloc(sizeof(*ipsec->rx_ipv6), GFP_KERNEL);
 	if (!ipsec->rx_ipv6)
@@ -2103,6 +2233,7 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 
 	ipsec->tx->ns = tns;
 	mutex_init(&ipsec->tx->ft.mutex);
+	ipsec->rx_ip_type->ns = rns;
 	ipsec->rx_ipv4->ns = rns;
 	ipsec->rx_ipv6->ns = rns;
 	mutex_init(&ipsec->rx_ipv4->ft.mutex);
@@ -2116,6 +2247,8 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 	kfree(ipsec->rx_ipv6);
 err_rx_ipv4:
 	kfree(ipsec->rx_ipv4);
+err_ip_type:
+	kfree(ipsec->rx_ip_type);
 err_tx:
 	kfree(ipsec->tx);
 	return err;
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
index f8be5b9e881c60..6e24395b5577ad 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
@@ -1626,7 +1626,7 @@ mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv)
 
 	ft->num_groups = 0;
 	ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE;
-	ft_attr.level = (priv->ipsec) ? 8 : 0;
+	ft_attr.level = (priv->ipsec) ? 9 : 0;
 	ft->t = mlx5_create_flow_table(priv->fts.ns, &ft_attr);
 
 	if (IS_ERR(ft->t)) {
@@ -2014,7 +2014,7 @@ mlx5e_create_vxlan_flow_table(struct mlx5e_priv *priv)
 
 	ft->num_groups = 0;
 	ft_attr.max_fte = MLX5E_VXLAN_TABLE_SIZE;
-	ft_attr.level = (priv->ipsec) ? 9 : 1;
+	ft_attr.level = (priv->ipsec) ? 10 : 1;
 	ft->t = mlx5_create_flow_table(priv->fts.ns, &ft_attr);
 
 	if (IS_ERR(ft->t)) {

From 215c8b79c498f647afcbe9fa9076c2c3329e09b4 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 02:15:23 +0000
Subject: [PATCH 032/143] riscv/vmm: Make vcpu sleep periods consistent with
 other platforms

There's no apparent reason for the difference here, so let's be
consistent to make merging easier.

Tested by:	br
---
 sys/riscv/vmm/vmm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c
index 0596e0de2e436b..f7cbfc1dfea580 100644
--- a/sys/riscv/vmm/vmm.c
+++ b/sys/riscv/vmm/vmm.c
@@ -1125,8 +1125,7 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
 	if (from_idle) {
 		while (vcpu->state != VCPU_IDLE) {
 			vcpu_notify_event_locked(vcpu);
-			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat",
-			    hz / 1000);
+			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
 		}
 	} else {
 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
@@ -1425,7 +1424,7 @@ vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
 		 * XXX msleep_spin() cannot be interrupted by signals so
 		 * wake up periodically to check pending signals.
 		 */
-		msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000);
+		msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 	}
 	vcpu_unlock(vcpu);

From 19cb383dc03a80e1651d80e0f0e3d4e9cbd20e04 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 02:15:36 +0000
Subject: [PATCH 033/143] vmm.4: Update to mention non-amd64 platforms

Most of the text here relates to PCI passthrough, which is still
amd64-only, but we should still document supported platforms.

While here, remove the comment that vmm.ko has to be loaded at boot-time
for passthrough, as devctl makes it possible to detach host drivers on
the fly.  I'm not aware of any other reason to require vmm.ko to be
loaded at boot.

Reviewed by:	br, andrew
Differential Revision:	https://reviews.freebsd.org/D48263
---
 share/man/man4/vmm.4 | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/share/man/man4/vmm.4 b/share/man/man4/vmm.4
index 7e4c9050021a45..07c40541f404a4 100644
--- a/share/man/man4/vmm.4
+++ b/share/man/man4/vmm.4
@@ -22,7 +22,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd September 27, 2024
+.Dd December 30, 2024
 .Dt VMM 4
 .Os
 .Sh NAME
@@ -45,15 +45,22 @@ kldload vmm
 provides the kernel portion of the
 .Xr bhyve 4
 hypervisor.
-.Pp
-An Intel CPU with VT-x/EPT or AMD CPU with SVM support is required.
+The following platforms are supported:
+.Bl -bullet -compat
+.It
+amd64: An Intel CPU with VT-x/EPT or AMD CPU with SVM support is required.
+.It
+arm64: The boot CPU must start in EL2 and the system must have a GICv3 interrupt
+controller.
+VHE support will be used if available.
+.It
+riscv: The CPUs must implement the H (hypervisor) RISC-V ISA extension.
+.El
 .Pp
 PCI device passthrough to a virtual machine requires
-hardware with VT-d support.
+hardware with VT-d support and is available only on amd64.
 .Sh PCI PASSTHROUGH
-When the hardware supports VT-d, and
-.Nm
-has been loaded at boot time,
+On amd64 where the hardware supports VT-d,
 PCI devices can be reserved for use by the hypervisor.
 Entries consisting of the PCI
 .Ar bus Ns / Ns Ar slot Ns / Ns Ar function
@@ -143,6 +150,8 @@ back:
 .Nm vmm.ko
 first appeared in
 .Fx 10.0 .
+arm64 and riscv support first appeared in
+.Fx 15.0 .
 .Sh AUTHORS
 .An Neel Natu Aq neel@freebsd.org
 .An Peter Grehan Aq grehan@freebsd.org

From b09fe08ede8f90ab40f09eac49700698dd4d02b3 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 02:15:51 +0000
Subject: [PATCH 034/143] amd64/vmm: Remove vmm_mem_init()

It is a no-op and doesn't exist on other platforms.  As part of some
work to deduplicate vmm code, just remove it.

No functional change intended.

Reviewed by:	corvink
Differential Revision:	https://reviews.freebsd.org/D48267
---
 sys/amd64/vmm/vmm.c     | 6 ------
 sys/amd64/vmm/vmm_mem.c | 7 -------
 sys/amd64/vmm/vmm_mem.h | 1 -
 3 files changed, 14 deletions(-)

diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index d1f57a717fdf78..0e3ab2845d58fd 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -427,8 +427,6 @@ vm_exitinfo_cpuset(struct vcpu *vcpu)
 static int
 vmm_init(void)
 {
-	int error;
-
 	if (!vmm_is_hw_supported())
 		return (ENXIO);
 
@@ -449,10 +447,6 @@ vmm_init(void)
 	if (vmm_ipinum < 0)
 		vmm_ipinum = IPI_AST;
 
-	error = vmm_mem_init();
-	if (error)
-		return (error);
-
 	vmm_suspend_p = vmmops_modsuspend;
 	vmm_resume_p = vmmops_modresume;
 
diff --git a/sys/amd64/vmm/vmm_mem.c b/sys/amd64/vmm/vmm_mem.c
index 0e953b6af534d0..e96c9e4bdc66bb 100644
--- a/sys/amd64/vmm/vmm_mem.c
+++ b/sys/amd64/vmm/vmm_mem.c
@@ -45,13 +45,6 @@
 
 #include "vmm_mem.h"
 
-int
-vmm_mem_init(void)
-{
-
-	return (0);
-}
-
 vm_object_t
 vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,
 	       vm_paddr_t hpa)
diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h
index b237e08ccc053d..41b9bf07c4fc61 100644
--- a/sys/amd64/vmm/vmm_mem.h
+++ b/sys/amd64/vmm/vmm_mem.h
@@ -32,7 +32,6 @@
 struct vmspace;
 struct vm_object;
 
-int		vmm_mem_init(void);
 struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len,
 				 vm_paddr_t hpa);
 void		vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);

From c945c9ddca8d6db162334127a43ef8a0be1d3db1 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 02:16:04 +0000
Subject: [PATCH 035/143] amd64/vmm: Rename vm_get_vmspace() to vm_vmspace()

For consistency with other vm accessors.

No functional change intended.

Reviewed by:	corvink
Differential Revision:	https://reviews.freebsd.org/D48268
---
 sys/amd64/include/vmm.h         | 2 +-
 sys/amd64/vmm/vmm.c             | 3 +--
 sys/amd64/vmm/vmm_dev_machdep.c | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index dd8e76962cafc4..6501baa455daaa 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -401,7 +401,7 @@ vcpu_should_yield(struct vcpu *vcpu)
 
 void *vcpu_stats(struct vcpu *vcpu);
 void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr);
-struct vmspace *vm_get_vmspace(struct vm *vm);
+struct vmspace *vm_vmspace(struct vm *vm);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
 struct vpmtmr *vm_pmtmr(struct vm *vm);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 0e3ab2845d58fd..d05d979a531a2b 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -2677,9 +2677,8 @@ vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr)
 }
 
 struct vmspace *
-vm_get_vmspace(struct vm *vm)
+vm_vmspace(struct vm *vm)
 {
-
 	return (vm->vmspace);
 }
 
diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c
index 2d0ceadaedfed0..d8d2b460404c42 100644
--- a/sys/amd64/vmm/vmm_dev_machdep.c
+++ b/sys/amd64/vmm/vmm_dev_machdep.c
@@ -441,7 +441,7 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,
 		break;
 	case VM_GET_GPA_PMAP:
 		gpapte = (struct vm_gpa_pte *)data;
-		pmap_get_mapping(vmspace_pmap(vm_get_vmspace(vm)),
+		pmap_get_mapping(vmspace_pmap(vm_vmspace(vm)),
 				 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
 		error = 0;
 		break;

From fd94571ccf0c9c6521063c08a72a760873e87897 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 02:20:05 +0000
Subject: [PATCH 036/143] rawip: Take the inpcb lock when appropriate in
 rip_ctloutput()

Reviewed by:	glebius
MFC after:	1 week
Sponsored by:	Klara, Inc.
Sponsored by:	Stormshield
Differential Revision:	https://reviews.freebsd.org/D48344
---
 sys/netinet/raw_ip.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index a6bef1c7e2752f..3a0b9f632fb456 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -625,8 +625,6 @@ rip_send(struct socket *so, int pruflags, struct mbuf *m, struct sockaddr *nam,
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
- *
- * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
@@ -637,7 +635,9 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
 	if (sopt->sopt_level != IPPROTO_IP) {
 		if ((sopt->sopt_level == SOL_SOCKET) &&
 		    (sopt->sopt_name == SO_SETFIB)) {
+			INP_WLOCK(inp);
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
+			INP_WUNLOCK(inp);
 			return (0);
 		}
 		return (EINVAL);
@@ -707,10 +707,12 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
 					    sizeof optval);
 			if (error)
 				break;
+			INP_WLOCK(inp);
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
+			INP_WUNLOCK(inp);
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */

From cc0d806f63e833b9e011c0665905b2208b436c8b Mon Sep 17 00:00:00 2001
From: Konstantin Belousov <kib@FreeBSD.org>
Date: Mon, 6 Jan 2025 13:01:11 +0200
Subject: [PATCH 037/143] open(2): allow O_PATH | O_CREAT

There is no reason to disallow creating the file opened for path.
More, it might be a useful feature together with O_EXCL.

Reviewed by:	markj
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D48332
---
 sys/kern/vfs_syscalls.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 7a1677c945e316..bbd67c2b032697 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1185,7 +1185,7 @@ openatfp(struct thread *td, int dirfd, const char *path,
 	 * except O_EXEC is ignored.
 	 */
 	if ((flags & O_PATH) != 0) {
-		flags &= ~(O_CREAT | O_ACCMODE);
+		flags &= ~O_ACCMODE;
 	} else if ((flags & O_EXEC) != 0) {
 		if (flags & O_ACCMODE)
 			return (EINVAL);

From 749b3b2c0629f44f6b0044992dfb2ce5ac7e562b Mon Sep 17 00:00:00 2001
From: Konstantin Belousov <kib@FreeBSD.org>
Date: Tue, 7 Jan 2025 00:07:07 +0200
Subject: [PATCH 038/143] path_test: adjust test for open(O_PATH | O_CREAT)

Instead of failing, it must succeed now.

Reviewed by:	markj
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D48332
---
 tests/sys/file/path_test.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/sys/file/path_test.c b/tests/sys/file/path_test.c
index 911c7c7075f0d4..b3b8b7cebd4db1 100644
--- a/tests/sys/file/path_test.c
+++ b/tests/sys/file/path_test.c
@@ -684,10 +684,14 @@ ATF_TC_BODY(path_io, tc)
 	size_t page_size;
 	int error, fd, pathfd, sd[2];
 
-	/* It shouldn't be possible to create new files with O_PATH. */
+	/* It is allowed to create new files with O_PATH. */
 	snprintf(path, sizeof(path), "path_io.XXXXXX");
 	ATF_REQUIRE_MSG(mktemp(path) == path, FMT_ERR("mktemp"));
-	ATF_REQUIRE_ERRNO(ENOENT, open(path, O_PATH | O_CREAT, 0600) < 0);
+	pathfd = open(path, O_PATH | O_CREAT, 0600);
+	ATF_REQUIRE_MSG(pathfd >= 0, FMT_ERR("open(O_PATH|O_CREAT)"));
+	/* Ensure that this is indeed O_PATH fd */
+	ATF_REQUIRE_ERRNO(EBADF, write(pathfd, path, strlen(path)) == -1);
+	CHECKED_CLOSE(pathfd);
 
 	/* Create a non-empty file for use in the rest of the tests. */
 	mktfile(path, "path_io.XXXXXX");

From 2372f8cc640c2f4ab82831e6ac0e27ab6c18321a Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Fri, 20 Dec 2024 14:23:50 +0000
Subject: [PATCH 039/143] LinuxKPI 802.11 / rtw88: make packets flow again

In 886653492945f we added checks for packets to only go out if the
station is known to the firmware (amongst others) as there are
implications in drivers.
Unfortunately rtw88 does not support the mac80211 (*sta_state)() KPI
but only the fallback (*sta_add/remove)() in which case the station is
only added to firmware when going from AUTH to ASSOC.  That means we
had no chance to get authenticated anymore.

The problem has existed since June in main and stable/14 but only now
was noticed in December with 14.2-R which makes me wonder.

I am still not entirely sure what implications the missing checks have
on all the other drivers using (*sta_state)() (or if they were really
needed in first place beyond txq_ready) but I have run a few days of
iwlwifi with this without extra trouble but I was not always able to
reproduce problems in the past.  Also people are occasionally still
reporting the original "Invalid TXQ" error which indicates there is
another lingering case somewhere.

For the moment make rtw88 work again and expose the change to a wider
audience.

PR:	283142, 274382
Fixes:	886653492945f (make sure we can send DISASSOC or DEAUTH frames)
Tested by:	imb protected-networks.net, oleg.nauman gmail.com
Sponsored by:	The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D48161
---
 sys/compat/linuxkpi/common/src/linux_80211.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c
index edc3131286f5f2..d66bc40f40f68d 100644
--- a/sys/compat/linuxkpi/common/src/linux_80211.c
+++ b/sys/compat/linuxkpi/common/src/linux_80211.c
@@ -3704,7 +3704,16 @@ lkpi_ic_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
 
 	lsta = ni->ni_drv_data;
 	LKPI_80211_LSTA_TXQ_LOCK(lsta);
+#if 0
 	if (!lsta->added_to_drv || !lsta->txq_ready) {
+#else
+	/*
+	 * Backout this part of 886653492945f which breaks rtw88 or
+	 * in general drivers without (*sta_state)() but only the
+	 * legacy fallback to (*sta_add)().
+	 */
+	if (!lsta->txq_ready) {
+#endif
 		LKPI_80211_LSTA_TXQ_UNLOCK(lsta);
 		/*
 		 * Free the mbuf (do NOT release ni ref for the m_pkthdr.rcvif!
@@ -3952,7 +3961,16 @@ lkpi_80211_txq_task(void *ctx, int pending)
 	 * We also use txq_ready as a semaphore and will drain the txq manually
 	 * if needed on our way towards SCAN/INIT in the state machine.
 	 */
+#if 0
 	shall_tx = lsta->added_to_drv && lsta->txq_ready;
+#else
+	/*
+	 * Backout this part of 886653492945f which breaks rtw88 or
+	 * in general drivers without (*sta_state)() but only the
+	 * legacy fallback to (*sta_add)().
+	 */
+	shall_tx = lsta->txq_ready;
+#endif
 	if (__predict_true(shall_tx))
 		mbufq_concat(&mq, &lsta->txq);
 	/*

From 91a4107d6d3028acd96df96de33b8a7665d3eb03 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 28 Dec 2024 10:00:09 +0000
Subject: [PATCH 040/143] ifconfig: remove debug printfs from set80211vhtconf()

Anyone testing VHT options would wonder about these extra two printfs
by now.  Remove them from the tree before I have to do so locally again
in another branch.

Sponsored by:	The FreeBSD Foundation
Fixes:		e9bb7f9aa1b4f
MFC after:	1 week
Reviewed by:	adrian, emaste
Differential Revision: https://reviews.freebsd.org/D48319
---
 sbin/ifconfig/ifieee80211.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c
index 25de7fb1363256..396368798da344 100644
--- a/sbin/ifconfig/ifieee80211.c
+++ b/sbin/ifconfig/ifieee80211.c
@@ -1978,13 +1978,11 @@ set80211vhtconf(if_ctx *ctx, const char *val __unused, int d)
 {
 	if (get80211val(ctx, IEEE80211_IOC_VHTCONF, &vhtconf) < 0)
 		errx(-1, "cannot set VHT setting");
-	printf("%s: vhtconf=0x%08x, d=%d\n", __func__, vhtconf, d);
 	if (d < 0) {
 		d = -d;
 		vhtconf &= ~d;
 	} else
 		vhtconf |= d;
-	printf("%s: vhtconf is now 0x%08x\n", __func__, vhtconf);
 	set80211(ctx, IEEE80211_IOC_VHTCONF, vhtconf, 0, NULL);
 }
 

From 2be86b6cc168615e19350710347b77616c4b7f19 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 14:30:53 +0000
Subject: [PATCH 041/143] makefs: Remove dead code in inode_type()

No functional change intended.

MFC after:	1 week
---
 usr.sbin/makefs/walk.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/usr.sbin/makefs/walk.c b/usr.sbin/makefs/walk.c
index 4018652299576f..fe1fe8df80dbcf 100644
--- a/usr.sbin/makefs/walk.c
+++ b/usr.sbin/makefs/walk.c
@@ -603,8 +603,6 @@ inode_type(mode_t mode)
 		return ("symlink");
 	if (S_ISDIR(mode))
 		return ("dir");
-	if (S_ISLNK(mode))
-		return ("link");
 	if (S_ISFIFO(mode))
 		return ("fifo");
 	if (S_ISSOCK(mode))

From ce878284318e71217d8d8f43f7d590b6c338d3aa Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 14:31:02 +0000
Subject: [PATCH 042/143] makefs: Handle special file types when creating a
 zpool

Previously, anything other than a regular file, directory or symlink
would cause makefs to exit with an assertion failure.  Make it a bit
more resilient to user error: print a warning and skip the file.  Add a
regression test wherein we create an image from a devfs mount.

PR:		283583
MFC after:	2 weeks
---
 usr.sbin/makefs/tests/makefs_zfs_tests.sh | 22 +++++++++++++
 usr.sbin/makefs/zfs/fs.c                  | 39 +++++++++++++++++++----
 2 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/usr.sbin/makefs/tests/makefs_zfs_tests.sh b/usr.sbin/makefs/tests/makefs_zfs_tests.sh
index aeda889d9a5c1d..3d5819439a733e 100644
--- a/usr.sbin/makefs/tests/makefs_zfs_tests.sh
+++ b/usr.sbin/makefs/tests/makefs_zfs_tests.sh
@@ -148,6 +148,27 @@ dataset_removal_cleanup()
 	common_cleanup
 }
 
+#
+# Make sure that we can handle some special file types.  Anything other than
+# regular files, symlinks and directories are ignored.
+#
+atf_test_case devfs cleanup
+devfs_body()
+{
+	atf_check mkdir dev
+	atf_check mount -t devfs none ./dev
+
+	atf_check -e match:"skipping unhandled" $MAKEFS -s 1g -o rootpath=/ \
+	    -o poolname=$ZFS_POOL_NAME $TEST_IMAGE ./dev
+
+	import_image
+}
+devfs_cleanup()
+{
+	common_cleanup
+	umount -f ./dev
+}
+
 #
 # Make sure that we can create and remove an empty directory.
 #
@@ -842,6 +863,7 @@ atf_init_test_cases()
 	atf_add_test_case autoexpand
 	atf_add_test_case basic
 	atf_add_test_case dataset_removal
+	atf_add_test_case devfs
 	atf_add_test_case empty_dir
 	atf_add_test_case empty_fs
 	atf_add_test_case file_extend
diff --git a/usr.sbin/makefs/zfs/fs.c b/usr.sbin/makefs/zfs/fs.c
index 9413241da0c7d5..073dce3ce6978b 100644
--- a/usr.sbin/makefs/zfs/fs.c
+++ b/usr.sbin/makefs/zfs/fs.c
@@ -177,6 +177,13 @@ fsnode_isroot(const fsnode *cur)
 	return (strcmp(cur->name, ".") == 0);
 }
 
+static bool
+fsnode_valid(const fsnode *cur)
+{
+	return (cur->type == S_IFREG || cur->type == S_IFDIR ||
+	    cur->type == S_IFLNK);
+}
+
 /*
  * Visit each node in a directory hierarchy, in pre-order depth-first order.
  */
@@ -186,9 +193,11 @@ fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg)
 	assert(root->type == S_IFDIR);
 
 	for (fsnode *cur = root; cur != NULL; cur = cur->next) {
-		assert(cur->type == S_IFREG || cur->type == S_IFDIR ||
-		    cur->type == S_IFLNK);
-
+		if (!fsnode_valid(cur)) {
+			warnx("skipping unhandled %s %s/%s",
+			    inode_type(cur->type), cur->path, cur->name);
+			continue;
+		}
 		if (cb(cur, arg) == 0)
 			continue;
 		if (cur->type == S_IFDIR && cur->child != NULL)
@@ -381,9 +390,15 @@ fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur,
 		 */
 		for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child;
 		    c != NULL; c = c->next) {
-			if (c->type == S_IFDIR)
+			switch (c->type) {
+			case S_IFDIR:
 				links++;
-			objsize++;
+				/* FALLTHROUGH */
+			case S_IFREG:
+			case S_IFLNK:
+				objsize++;
+				break;
+			}
 		}
 
 		/* The root directory is its own parent. */
@@ -652,6 +667,16 @@ fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg)
 	fs_populate_sattrs(arg, cur, dnode);
 }
 
+static fsnode *
+fsnode_next(fsnode *cur)
+{
+	for (cur = cur->next; cur != NULL; cur = cur->next) {
+		if (fsnode_valid(cur))
+			return (cur);
+	}
+	return (NULL);
+}
+
 static int
 fs_foreach_populate(fsnode *cur, void *_arg)
 {
@@ -678,7 +703,7 @@ fs_foreach_populate(fsnode *cur, void *_arg)
 
 	ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1;
 
-	if (cur->next == NULL &&
+	if (fsnode_next(cur) == NULL &&
 	    (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) {
 		/*
 		 * We reached a terminal node in a subtree.  Walk back up and
@@ -694,7 +719,7 @@ fs_foreach_populate(fsnode *cur, void *_arg)
 				eclose(dir->dirfd);
 			free(dir);
 			cur = cur->parent;
-		} while (cur != NULL && cur->next == NULL &&
+		} while (cur != NULL && fsnode_next(cur) == NULL &&
 		    (cur->inode->flags & FI_ROOT) == 0);
 	}
 

From 596ee234ef4537e71f030e13598ecbe73ee697bb Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 14:31:15 +0000
Subject: [PATCH 043/143] ktrace: Make -t t trace struct arrays as well as
 structs

Otherwise there is no specific -t option which captures struct arrays.

MFC after:	1 week
---
 usr.bin/ktrace/ktrace.1 | 2 +-
 usr.bin/ktrace/subr.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/usr.bin/ktrace/ktrace.1 b/usr.bin/ktrace/ktrace.1
index 6542bb88dffd8c..c2f046dc52872c 100644
--- a/usr.bin/ktrace/ktrace.1
+++ b/usr.bin/ktrace/ktrace.1
@@ -132,7 +132,7 @@ trace capability check failures
 .It Cm s
 trace signal processing
 .It Cm t
-trace various structures
+trace various structures and arrays of structures
 .It Cm u
 userland traces generated by
 .Xr utrace 2
diff --git a/usr.bin/ktrace/subr.c b/usr.bin/ktrace/subr.c
index 6762fe9620cbd8..1db4c214414b08 100644
--- a/usr.bin/ktrace/subr.c
+++ b/usr.bin/ktrace/subr.c
@@ -70,7 +70,7 @@ getpoints(char *s)
 			facs |= KTRFAC_PSIG;
 			break;
 		case 't':
-			facs |= KTRFAC_STRUCT;
+			facs |= KTRFAC_STRUCT | KTRFAC_STRUCT_ARRAY;
 			break;
 		case 'u':
 			facs |= KTRFAC_USER;

From d3bdfa583044dbfb76ef777939b86bb68baebee7 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 14:33:06 +0000
Subject: [PATCH 044/143] bhyve: Use a non-blocking read in slirp_recv()

When using the slirp backend with the e1000 frontend, I otherwise get
hangs in readv(), caused by the e1000 emulation not checking whether
bytes are available before trying to read them.  In particular, that
device model expects the recv callback to return 0 if no bytes are
available, and with slirp it would end up blocking forever.  The virtio
device model uses the peek_recvlen to check first, so I didn't notice
the problem when implementing the slirp backend.

Make the slirp backend more flexible to accommodate e1000.

MFC after:	1 month
Differential Revision:	https://reviews.freebsd.org/D48164
---
 usr.sbin/bhyve/net_backend_slirp.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/usr.sbin/bhyve/net_backend_slirp.c b/usr.sbin/bhyve/net_backend_slirp.c
index 5ae33801387cd2..d070d2cdfdb6fb 100644
--- a/usr.sbin/bhyve/net_backend_slirp.c
+++ b/usr.sbin/bhyve/net_backend_slirp.c
@@ -609,11 +609,22 @@ static ssize_t
 slirp_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
 	struct slirp_priv *priv = NET_BE_PRIV(be);
+	struct msghdr hdr;
 	ssize_t n;
 
-	n = readv(priv->pipe[0], iov, iovcnt);
-	if (n < 0)
+	hdr.msg_name = NULL;
+	hdr.msg_namelen = 0;
+	hdr.msg_iov = __DECONST(struct iovec *, iov);
+	hdr.msg_iovlen = iovcnt;
+	hdr.msg_control = NULL;
+	hdr.msg_controllen = 0;
+	hdr.msg_flags = 0;
+	n = recvmsg(priv->pipe[0], &hdr, MSG_DONTWAIT);
+	if (n < 0) {
+		if (errno == EWOULDBLOCK)
+			return (0);
 		return (-1);
+	}
 	assert(n <= SLIRP_MTU);
 	return (n);
 }

From 20a51e6073f488440e108c7c628231cd6ae6757e Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 14:33:45 +0000
Subject: [PATCH 045/143] bhyve: Implement the libslirp notify callback

libslirp can invoke a callback when received data is removed from a
socket buffer, generally because the guest ACKed some data.  Previously
it didn't do anything, but it needs to wake up the poll thread to get
reasonable throughput.

Suppose one is using scp to copy data into a guest filesystem via the
slirp backend.  Data is received on libslirp's socket, which we poll for
data in slirp_pollfd_td_loop().  That data gets buffered in priv->pipe,
and eventually is placed in the device model's RX rings by the backend's
mevent handler.  When implementing TCP, libslirp holds on to a copy of
data until it's ACKed by the guest via slirp_send(), at which point it
drops that data and invokes the notify callback.

The initial implementation of this backend didn't take into account the
fact that slirp_pollfds_fill() will not add libslirp's socket to the
pollfd set if more than a threshold amount of data is already buffered.
Then poll() needs to time out before the backend sends more data to the
guest.  With a default timeout of 500ms, this kills throughput.

Use a pipe to implement a simple in-band signal to the poll thread so
that it reacts quickly when more buffer space becomes available.

MFC after:	1 month
Differential Revision:	https://reviews.freebsd.org/D48192
---
 usr.sbin/bhyve/net_backend_slirp.c | 90 ++++++++++++++++++++++--------
 1 file changed, 68 insertions(+), 22 deletions(-)

diff --git a/usr.sbin/bhyve/net_backend_slirp.c b/usr.sbin/bhyve/net_backend_slirp.c
index d070d2cdfdb6fb..171c5b5bdbbdb7 100644
--- a/usr.sbin/bhyve/net_backend_slirp.c
+++ b/usr.sbin/bhyve/net_backend_slirp.c
@@ -84,6 +84,18 @@ static slirp_new_p_t slirp_new_p;
 static slirp_pollfds_fill_p_t slirp_pollfds_fill_p;
 static slirp_pollfds_poll_p_t slirp_pollfds_poll_p;
 
+static void
+checked_close(int *fdp)
+{
+	int error;
+
+	if (*fdp != -1) {
+		error = close(*fdp);
+		assert(error == 0);
+		*fdp = -1;
+	}
+}
+
 static int
 slirp_init_once(void)
 {
@@ -134,7 +146,8 @@ struct slirp_priv {
 
 #define	SLIRP_MTU	2048
 	struct mevent *mevp;
-	int pipe[2];
+	int pipe[2];		/* used to buffer data sent to the guest */
+	int wakeup[2];		/* used to wake up the pollfd thread */
 
 	pthread_t pollfd_td;
 	struct pollfd *pollfds;
@@ -151,6 +164,7 @@ slirp_priv_init(struct slirp_priv *priv)
 
 	memset(priv, 0, sizeof(*priv));
 	priv->pipe[0] = priv->pipe[1] = -1;
+	priv->wakeup[0] = priv->wakeup[1] = -1;
 	error = pthread_mutex_init(&priv->mtx, NULL);
 	assert(error == 0);
 }
@@ -160,14 +174,10 @@ slirp_priv_cleanup(struct slirp_priv *priv)
 {
 	int error;
 
-	if (priv->pipe[0] != -1) {
-		error = close(priv->pipe[0]);
-		assert(error == 0);
-	}
-	if (priv->pipe[1] != -1) {
-		error = close(priv->pipe[1]);
-		assert(error == 0);
-	}
+	checked_close(&priv->pipe[0]);
+	checked_close(&priv->pipe[1]);
+	checked_close(&priv->wakeup[0]);
+	checked_close(&priv->wakeup[1]);
 	if (priv->mevp)
 		mevent_delete(priv->mevp);
 	if (priv->slirp != NULL)
@@ -188,8 +198,13 @@ slirp_cb_clock_get_ns(void *param __unused)
 }
 
 static void
-slirp_cb_notify(void *param __unused)
+slirp_cb_notify(void *param)
 {
+	struct slirp_priv *priv;
+
+	/* Wake up the poll thread.  We assume that priv->mtx is held here. */
+	priv = param;
+	(void)write(priv->wakeup[1], "M", 1);
 }
 
 static void
@@ -310,11 +325,19 @@ slirp_poll_revents(int idx, void *param)
 {
 	struct slirp_priv *priv;
 	struct pollfd *pollfd;
+	short revents;
 
 	priv = param;
+	assert(idx >= 0);
+	assert((unsigned int)idx < priv->npollfds);
 	pollfd = &priv->pollfds[idx];
 	assert(pollfd->fd != -1);
-	return (pollev2slirpev(pollfd->revents));
+
+	/* The kernel may report POLLHUP even if we didn't ask for it. */
+	revents = pollfd->revents;
+	if ((pollfd->events & POLLHUP) == 0)
+		revents &= ~POLLHUP;
+	return (pollev2slirpev(revents));
 }
 
 static void *
@@ -331,9 +354,14 @@ slirp_pollfd_td_loop(void *param)
 
 	pthread_mutex_lock(&priv->mtx);
 	for (;;) {
+		int wakeup;
+
 		for (size_t i = 0; i < priv->npollfds; i++)
 			priv->pollfds[i].fd = -1;
 
+		/* Register for notifications from slirp_cb_notify(). */
+		wakeup = slirp_addpoll_cb(priv->wakeup[0], POLLIN, priv);
+
 		timeout = UINT32_MAX;
 		slirp_pollfds_fill_p(priv->slirp, &timeout, slirp_addpoll_cb,
 		    priv);
@@ -341,20 +369,32 @@ slirp_pollfd_td_loop(void *param)
 		pollfds = priv->pollfds;
 		npollfds = priv->npollfds;
 		pthread_mutex_unlock(&priv->mtx);
-		for (;;) {
-			error = poll(pollfds, npollfds, timeout);
-			if (error == -1) {
-				if (errno != EINTR) {
-					EPRINTLN("poll: %s", strerror(errno));
-					exit(1);
-				}
-				continue;
-			}
-			break;
+		error = poll(pollfds, npollfds, timeout);
+		if (error == -1 && errno != EINTR) {
+			EPRINTLN("poll: %s", strerror(errno));
+			exit(1);
 		}
 		pthread_mutex_lock(&priv->mtx);
 		slirp_pollfds_poll_p(priv->slirp, error == -1,
 		    slirp_poll_revents, priv);
+
+		/*
+		 * If we were woken up by the notify callback, mask the
+		 * interrupt.
+		 */
+		if ((pollfds[wakeup].revents & POLLIN) != 0) {
+			ssize_t n;
+
+			do {
+				uint8_t b;
+
+				n = read(priv->wakeup[0], &b, 1);
+			} while (n == 1);
+			if (n != -1 || errno != EAGAIN) {
+				EPRINTLN("read(wakeup): %s", strerror(errno));
+				exit(1);
+			}
+		}
 	}
 }
 
@@ -510,12 +550,18 @@ _slirp_init(struct net_backend *be, const char *devname __unused,
 		free(tofree);
 	}
 
-	error = socketpair(PF_LOCAL, SOCK_DGRAM, 0, priv->pipe);
+	error = socketpair(PF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0, priv->pipe);
 	if (error != 0) {
 		EPRINTLN("Unable to create pipe: %s", strerror(errno));
 		goto err;
 	}
 
+	error = pipe2(priv->wakeup, O_CLOEXEC | O_NONBLOCK);
+	if (error != 0) {
+		EPRINTLN("Unable to create wakeup pipe: %s", strerror(errno));
+		goto err;
+	}
+
 	/*
 	 * Try to avoid dropping buffered packets in slirp_cb_send_packet().
 	 */

From f1aeb5d850cf26418fb70a16d1304b92c45b5f1d Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Tue, 7 Jan 2025 12:16:57 +0000
Subject: [PATCH 046/143] LinuxKPI: 802.11: add a print mask for
 ieee80211_rx_status_flags bits

Add a print mask for use with %b to aid debugging.  It is a lot easier
to read names than numbers.

Sponsored by:	The FreeBSD Foundation
MFC after:	3 days
---
 sys/compat/linuxkpi/common/include/net/mac80211.h | 11 +++++++++++
 sys/compat/linuxkpi/common/src/linux_80211.c      |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h
index dff152caf14094..3aa383554e9310 100644
--- a/sys/compat/linuxkpi/common/include/net/mac80211.h
+++ b/sys/compat/linuxkpi/common/include/net/mac80211.h
@@ -625,6 +625,17 @@ enum ieee80211_rx_status_flags {
 	RX_FLAG_FAILED_PLCP_CRC		= BIT(31),
 };
 
+#define	IEEE80211_RX_STATUS_FLAGS_BITS					\
+	"\20\1ALLOW_SAME_PN\2AMPDU_DETAILS\3AMPDU_EOF_BIT\4AMPDU_EOF_BIT_KNOWN" \
+	"\5DECRYPTED\6DUP_VALIDATED\7FAILED_FCS_CRC\10ICV_STRIPPED" \
+	"\11MACTIME_PLCP_START\12MACTIME_START\13MIC_STRIPPED" \
+	"\14MMIC_ERROR\15MMIC_STRIPPED\16NO_PSDU\17PN_VALIDATED" \
+	"\20RADIOTAP_HE\21RADIOTAP_HE_MU\22RADIOTAP_LSIG\23RADIOTAP_VENDOR_DATA" \
+	"\24NO_SIGNAL_VAL\25IV_STRIPPED\26AMPDU_IS_LAST\27AMPDU_LAST_KNOWN" \
+	"\30AMSDU_MORE\31MACTIME_END\32ONLY_MONITOR\33SKIP_MONITOR" \
+	"\348023\35RADIOTAP_TLV_AT_END\36MACTIME\37MACTIME_IS_RTAP_TS64" \
+	"\40FAILED_PLCP_CRC"
+
 enum mac80211_rx_encoding {
 	RX_ENC_LEGACY		= 0,
 	RX_ENC_HT,
diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c
index d66bc40f40f68d..77e23775ba1951 100644
--- a/sys/compat/linuxkpi/common/src/linux_80211.c
+++ b/sys/compat/linuxkpi/common/src/linux_80211.c
@@ -5333,13 +5333,13 @@ linuxkpi_ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 
 	/* Implement a dump_rxcb() !!! */
 	if (linuxkpi_debug_80211 & D80211_TRACE_RX)
-		printf("TRACE-RX: %s: RXCB: %ju %ju %u, %#0x, %u, %#0x, %#0x, "
+		printf("TRACE-RX: %s: RXCB: %ju %ju %u, %b, %u, %#0x, %#0x, "
 		    "%u band %u, %u { %d %d %d %d }, %d, %#x %#x %#x %#x %u %u %u\n",
 			__func__,
 			(uintmax_t)rx_status->boottime_ns,
 			(uintmax_t)rx_status->mactime,
 			rx_status->device_timestamp,
-			rx_status->flag,
+			rx_status->flag, IEEE80211_RX_STATUS_FLAGS_BITS,
 			rx_status->freq,
 			rx_status->bw,
 			rx_status->encoding,

From cf71349a23f02b55bd3bb4973decac87f7a7d2b8 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 4 Jan 2025 08:01:24 +0000
Subject: [PATCH 047/143] ifconfig: 802.11: fix indentation of a line

No functional changes.

Sponsored by:	The FreeBSD Foundation
MFC after:	3 days
---
 sbin/ifconfig/ifieee80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c
index 396368798da344..77f7bdabb0b2fc 100644
--- a/sbin/ifconfig/ifieee80211.c
+++ b/sbin/ifconfig/ifieee80211.c
@@ -2296,7 +2296,7 @@ regdomain_addchans(if_ctx *ctx, struct ieee80211req_chaninfo *ci,
 			memset(c, 0, sizeof(*c));
 			c->ic_freq = freq;
 			c->ic_flags = flags;
-		if (c->ic_flags & IEEE80211_CHAN_DFS)
+			if (c->ic_flags & IEEE80211_CHAN_DFS)
 				c->ic_maxregpower = nb->maxPowerDFS;
 			else
 				c->ic_maxregpower = nb->maxPower;

From e6d40f90110ad8026f1af3fa68f836463936ea78 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Tue, 7 Jan 2025 11:56:07 +0000
Subject: [PATCH 048/143] net80211: correct typo s/Insure/Ensure/

No functional changes.

Sposnored by:	The FreeBSD Foundation
MFC after:	3 days
Reviewed by:	emaste
Differential Revision: https://reviews.freebsd.org/D48358
---
 sys/net80211/ieee80211_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/net80211/ieee80211_crypto.c b/sys/net80211/ieee80211_crypto.c
index d70b3aa4a24a12..e1fac3a624e8e3 100644
--- a/sys/net80211/ieee80211_crypto.c
+++ b/sys/net80211/ieee80211_crypto.c
@@ -741,7 +741,7 @@ ieee80211_crypto_decap(struct ieee80211_node *ni, struct mbuf *m, int hdrlen,
 		k = &ni->ni_ucastkey;
 
 	/*
-	 * Insure crypto header is contiguous and long enough for all
+	 * Ensure crypto header is contiguous and long enough for all
 	 * decap work.
 	 */
 	cip = k->wk_cipher;

From 6ba2c036a0117ac02f9979b7dc49f15e9c1ea9c9 Mon Sep 17 00:00:00 2001
From: Konstantin Belousov <kib@FreeBSD.org>
Date: Tue, 7 Jan 2025 01:29:18 +0200
Subject: [PATCH 049/143] pci_find_cap_method(): limit number of iterations for
 finding a capability

Powered down device might return 0xff of extended config registers
reads, causing loop.

PR:	283815
Reviewed by:	imp
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D48348
---
 sys/dev/pci/pci.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index cf01115581e16b..0b02d873c5a190 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -1519,6 +1519,7 @@ pci_find_cap_method(device_t dev, device_t child, int capability,
 	pcicfgregs *cfg = &dinfo->cfg;
 	uint32_t status;
 	uint8_t ptr;
+	int cnt;
 
 	/*
 	 * Check the CAP_LIST bit of the PCI status register first.
@@ -1545,9 +1546,11 @@ pci_find_cap_method(device_t dev, device_t child, int capability,
 	ptr = pci_read_config(child, ptr, 1);
 
 	/*
-	 * Traverse the capabilities list.
+	 * Traverse the capabilities list.  Limit by total theoretical
+	 * maximum number of caps: capability needs at least id and
+	 * next registers, and any type X header cannot contain caps.
 	 */
-	while (ptr != 0) {
+	for (cnt = 0; ptr != 0 && cnt < (PCIE_REGMAX - 0x40) / 2; cnt++) {
 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
 			if (capreg != NULL)
 				*capreg = ptr;

From f73c9b5da190954a81e9e70e2caa8e9168623bfd Mon Sep 17 00:00:00 2001
From: Ed Maste <emaste@FreeBSD.org>
Date: Tue, 7 Jan 2025 11:04:00 -0500
Subject: [PATCH 050/143] mi_switch.9: Remove cpu_switch, cpu_throw

cpu_machdep.9 was added to document cpu_*, but cpu_switch and cpu_throw
were already documented in mi_switch.9, and MLINKed.  cpu_machdep.9
seems like the correct place for this, so remove them from mi_switch.9.

Some of the removed text was stale, although there are few notes that
ought to be added to cpu_machdep.9 in a future commit.

Reported by:	tools/pkgbase/metalog_reader.lua
Reviewed by:	jhb
Sponsored by:	The FreeBSD Foundation
Fixes: 9c87cbbcaaed ("cpu_machdep.9: New manpage describing the semantics of several cpu_*")
Differential Revision: https://reviews.freebsd.org/D48360
---
 share/man/man9/Makefile    |  2 --
 share/man/man9/mi_switch.9 | 68 +++-----------------------------------
 2 files changed, 4 insertions(+), 66 deletions(-)

diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index 91a7bbe294fa1d..c09d3aa554a115 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -1594,8 +1594,6 @@ MLINKS+=microuptime.9 binuptime.9 \
 	microuptime.9 getsbinuptime.9 \
 	microuptime.9 nanouptime.9 \
 	microuptime.9 sbinuptime.9
-MLINKS+=mi_switch.9 cpu_switch.9 \
-	mi_switch.9 cpu_throw.9
 MLINKS+=mod_cc.9 CCV.9 \
 	mod_cc.9 DECLARE_CC_MODULE.9
 MLINKS+=mtx_pool.9 mtx_pool_alloc.9 \
diff --git a/share/man/man9/mi_switch.9 b/share/man/man9/mi_switch.9
index 549ec497434cc6..e04c2ee35acebe 100644
--- a/share/man/man9/mi_switch.9
+++ b/share/man/man9/mi_switch.9
@@ -31,23 +31,17 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd January 9, 2023
+.Dd January 7, 2025
 .Dt MI_SWITCH 9
 .Os
 .Sh NAME
-.Nm mi_switch ,
-.Nm cpu_switch ,
-.Nm cpu_throw
+.Nm mi_switch
 .Nd switch to another thread context
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/proc.h
 .Ft void
 .Fn mi_switch "int flags"
-.Ft void
-.Fn cpu_switch "struct thread *oldtd" "struct thread *newtd" "struct mtx *lock"
-.Ft void
-.Fn cpu_throw "struct thread *oldtd" "struct thread *newtd"
 .Sh DESCRIPTION
 The
 .Fn mi_switch
@@ -168,63 +162,9 @@ running thread
 .Fa oldtd
 to the chosen thread
 .Fa newtd .
-First, it saves the context of
-.Fa oldtd
-to its Process Control Block
-.Po
-PCB,
-.Vt struct pcb
-.Pc ,
-pointed at by
-.Va oldtd->td_pcb .
-The function then updates important per-CPU state such as the
-.Dv curthread
-variable, and activates
-.Fa newtd\&'s
-virtual address space using its associated
-.Xr pmap 9
-structure.
-Finally, it reads in the saved context from
-.Fa newtd\&'s
-PCB.
-CPU instruction flow continues in the new thread context, on
-.Fa newtd\&'s
-kernel stack.
-The return from
-.Fn cpu_switch
-can be understood as a completion of the function call initiated by
-.Fa newtd
-when it was previously switched out, at some point in the distant (relative to
-CPU time) past.
-.Pp
-The
-.Fa mtx
-argument to
-.Fn cpu_switch
-is used to pass the mutex which will be stored as
-.Fa oldtd\&'s
-thread lock at the moment that
-.Fa oldtd
-is completely switched out.
-This is an implementation detail of
-.Fn sched_switch .
-.Pp
-.Fn cpu_throw
-is similar to
-.Fn cpu_switch
-except that it does not save the context of the old thread.
-This function is useful when the kernel does not have an old thread
-context to save, such as when CPUs other than the boot CPU perform their
-first task switch, or when the kernel does not care about the state of the
-old thread, such as in
-.Xr thread_exit 9
-when the kernel terminates the current thread and switches into a new
-thread,
-.Fa newtd .
-The
-.Fa oldtd
-argument is unused.
 .Sh SEE ALSO
+.Xr cpu_switch 9 ,
+.Xr cpu_throw 9 ,
 .Xr critical_exit 9 ,
 .Xr issignal 9 ,
 .Xr kern_yield 9 ,

From 07f6575585bf69ae48dffe87c4578057ae4782d8 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 28 Dec 2024 09:52:45 +0000
Subject: [PATCH 051/143] LinuxKPI: 802.11: turn on debugfs for iwlwifi and
 rtw88

Make iwlwifi compile with debugfs after the last updates and turn it on
for both iwlwifi and rtw88 in order to be able to get at least some
useful information on driver/firwmare state.

Sponsored by:	The FreeBSD Foundation
MFC after:	10 days
---
 sys/compat/linuxkpi/common/include/net/mac80211.h | 4 ++++
 sys/contrib/dev/iwlwifi/mvm/debugfs-vif.c         | 2 ++
 sys/modules/iwlwifi/Makefile                      | 2 +-
 sys/modules/rtw88/Makefile                        | 5 ++++-
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h
index 3aa383554e9310..fe36f1adf28ade 100644
--- a/sys/compat/linuxkpi/common/include/net/mac80211.h
+++ b/sys/compat/linuxkpi/common/include/net/mac80211.h
@@ -737,6 +737,7 @@ struct ieee80211_sta_agg {
 };
 
 struct ieee80211_link_sta {
+	struct ieee80211_sta			*sta;
 	uint8_t					addr[ETH_ALEN];
 	uint8_t					link_id;
 	uint32_t				supp_rates[NUM_NL80211_BANDS];
@@ -1121,6 +1122,9 @@ struct ieee80211_ops {
 
 /* #ifdef CONFIG_MAC80211_DEBUGFS */	/* Do not change depending on compile-time option. */
 	void (*sta_add_debugfs)(struct ieee80211_hw *, struct ieee80211_vif *, struct ieee80211_sta *, struct dentry *);
+	void (*vif_add_debugfs)(struct ieee80211_hw *, struct ieee80211_vif *);
+	void (*link_sta_add_debugfs)(struct ieee80211_hw *, struct ieee80211_vif *, struct ieee80211_link_sta *, struct dentry *);
+	void (*link_add_debugfs)(struct ieee80211_hw *, struct ieee80211_vif *, struct ieee80211_bss_conf *, struct dentry *);
 /* #endif */
 };
 
diff --git a/sys/contrib/dev/iwlwifi/mvm/debugfs-vif.c b/sys/contrib/dev/iwlwifi/mvm/debugfs-vif.c
index aa505895532349..0b3bc62f39a707 100644
--- a/sys/contrib/dev/iwlwifi/mvm/debugfs-vif.c
+++ b/sys/contrib/dev/iwlwifi/mvm/debugfs-vif.c
@@ -888,10 +888,12 @@ void iwl_mvm_vif_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
 	struct dentry *dbgfs_dir = vif->debugfs_dir;
+#if defined(__linux__)
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	char buf[3 * 3 + 11 + (NL80211_WIPHY_NAME_MAXLEN + 1) +
 		 (7 + IFNAMSIZ + 1) + 6 + 1];
 	char name[7 + IFNAMSIZ + 1];
+#endif
 
 	/* this will happen in monitor mode */
 	if (!dbgfs_dir)
diff --git a/sys/modules/iwlwifi/Makefile b/sys/modules/iwlwifi/Makefile
index a8176383a13c04..c41a1a1757c1ab 100644
--- a/sys/modules/iwlwifi/Makefile
+++ b/sys/modules/iwlwifi/Makefile
@@ -3,7 +3,7 @@ DEVIWLWIFIDIR=	${SRCTOP}/sys/contrib/dev/iwlwifi
 .PATH: ${DEVIWLWIFIDIR}
 
 WITH_CONFIG_PM=	0
-WITH_DEBUGFS=	0
+WITH_DEBUGFS=	1
 
 KMOD=	if_iwlwifi
 
diff --git a/sys/modules/rtw88/Makefile b/sys/modules/rtw88/Makefile
index 19e77b271c3722..486197d8c95257 100644
--- a/sys/modules/rtw88/Makefile
+++ b/sys/modules/rtw88/Makefile
@@ -3,6 +3,7 @@ DEVRTW88DIR=	${SRCTOP}/sys/contrib/dev/rtw88
 .PATH: ${DEVRTW88DIR}
 
 WITH_CONFIG_PM=	0
+WITH_DEBUGFS=	1
 
 KMOD=	if_rtw88
 
@@ -39,6 +40,8 @@ CFLAGS+=	-DLINUXKPI_VERSION=60800
 CFLAGS+=	-I${DEVRTW88DIR}
 CFLAGS+=	${LINUXKPI_INCLUDES}
 CFLAGS+=	-DCONFIG_RTW88_DEBUG
-#CFLAGS+=	-DCONFIG_RTW88_DEBUGFS
+.if defined(WITH_DEBUGFS) && ${WITH_DEBUGFS} > 0
+CFLAGS+=	-DCONFIG_RTW88_DEBUGFS
+.endif
 
 .include <bsd.kmod.mk>

From 42410c6d682c4e00ce6147f99b51a55f6f3fe075 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 4 Jan 2025 16:58:49 +0000
Subject: [PATCH 052/143] ifconfig: make -vht work

Also hide the other vht options on -vht and only show vht40/80/160/80p80
when vht is enabled.

While here fix some whitespace and comments.

Sponsored by:	The FreeBSD Foundation
MFC after:	3 days
Reviewed by:	adrian, emaste
Differential Revision: https://reviews.freebsd.org/D48326
---
 sbin/ifconfig/ifieee80211.c | 45 ++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c
index 77f7bdabb0b2fc..d4dcfdf63254ff 100644
--- a/sbin/ifconfig/ifieee80211.c
+++ b/sbin/ifconfig/ifieee80211.c
@@ -198,8 +198,10 @@ static int gottxparams = 0;
 static struct ieee80211_channel curchan;
 static int gotcurchan = 0;
 static struct ifmediareq *global_ifmr;
+
+/* HT */
 static int htconf = 0;
-static	int gothtconf = 0;
+static int gothtconf = 0;
 
 static void
 gethtconf(if_ctx *ctx)
@@ -213,7 +215,7 @@ gethtconf(if_ctx *ctx)
 
 /* VHT */
 static int vhtconf = 0;
-static	int gotvhtconf = 0;
+static int gotvhtconf = 0;
 
 static void
 getvhtconf(if_ctx *ctx)
@@ -5416,26 +5418,27 @@ ieee80211_status(if_ctx *ctx)
 
 	if (IEEE80211_IS_CHAN_VHT(c) || verbose) {
 		getvhtconf(ctx);
-		if (vhtconf & IEEE80211_FVHT_VHT)
+		if (vhtconf & IEEE80211_FVHT_VHT) {
 			LINE_CHECK("vht");
-		else
+
+			if (vhtconf & IEEE80211_FVHT_USEVHT40)
+				LINE_CHECK("vht40");
+			else
+				LINE_CHECK("-vht40");
+			if (vhtconf & IEEE80211_FVHT_USEVHT80)
+				LINE_CHECK("vht80");
+			else
+				LINE_CHECK("-vht80");
+			if (vhtconf & IEEE80211_FVHT_USEVHT160)
+				LINE_CHECK("vht160");
+			else
+				LINE_CHECK("-vht160");
+			if (vhtconf & IEEE80211_FVHT_USEVHT80P80)
+				LINE_CHECK("vht80p80");
+			else
+				LINE_CHECK("-vht80p80");
+		} else if (verbose)
 			LINE_CHECK("-vht");
-		if (vhtconf & IEEE80211_FVHT_USEVHT40)
-			LINE_CHECK("vht40");
-		else
-			LINE_CHECK("-vht40");
-		if (vhtconf & IEEE80211_FVHT_USEVHT80)
-			LINE_CHECK("vht80");
-		else
-			LINE_CHECK("-vht80");
-		if (vhtconf & IEEE80211_FVHT_USEVHT160)
-			LINE_CHECK("vht160");
-		else
-			LINE_CHECK("-vht160");
-		if (vhtconf & IEEE80211_FVHT_USEVHT80P80)
-			LINE_CHECK("vht80p80");
-		else
-			LINE_CHECK("-vht80p80");
 	}
 
 	if (get80211val(ctx, IEEE80211_IOC_WME, &wme) != -1) {
@@ -6029,7 +6032,7 @@ static struct cmd ieee80211_cmds[] = {
 	DEF_CMD("ht",		3,	set80211htconf),	/* NB: 20+40 */
 	DEF_CMD("-ht",		0,	set80211htconf),
 	DEF_CMD("vht",		IEEE80211_FVHT_VHT,		set80211vhtconf),
-	DEF_CMD("-vht",		0,				set80211vhtconf),
+	DEF_CMD("-vht",		-IEEE80211_FVHT_VHT,		set80211vhtconf),
 	DEF_CMD("vht40",	IEEE80211_FVHT_USEVHT40,	set80211vhtconf),
 	DEF_CMD("-vht40",	-IEEE80211_FVHT_USEVHT40,	set80211vhtconf),
 	DEF_CMD("vht80",	IEEE80211_FVHT_USEVHT80,	set80211vhtconf),

From 1832eb102e10c7f2891c032ecf7b265b75d3cd50 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 4 Jan 2025 08:02:19 +0000
Subject: [PATCH 053/143] net80211: add missing 80Mhz and 160Mhz channel ranges

We have two arrays, one for 80Mhz and one for 160Mhz.  Both were lacking
frequency ranges for more possibly available configurations (the other
bits of what is valid are for regdomain to set right).

Sponsored by:	The FreeBSD Foundation
MFC after:	3 days
Fixes:		67f4aa3878efa, 04e7bb08a5750
Reviewed by:	adrian
Differential Revision: https://reviews.freebsd.org/D48357
---
 sys/net80211/ieee80211.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sys/net80211/ieee80211.c b/sys/net80211/ieee80211.c
index ccb7efaa4df584..49d313e5077d2b 100644
--- a/sys/net80211/ieee80211.c
+++ b/sys/net80211/ieee80211.c
@@ -1196,12 +1196,14 @@ struct vht_chan_range vht80_chan_ranges[] = {
 	{ 5570, 5650 },
 	{ 5650, 5730 },
 	{ 5735, 5815 },
+	{ 5815, 5895 },
 	{ 0, 0 }
 };
 
 struct vht_chan_range vht160_chan_ranges[] = {
 	{ 5170, 5330 },
 	{ 5490, 5650 },
+	{ 5735, 5895 },
 	{ 0, 0 }
 };
 

From 254a2b767f9a39f1541e0a07a70bbe269e86ad70 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 7 Jan 2025 17:58:58 +0000
Subject: [PATCH 054/143] x86: Short-circuit ipi_all_but_self() on UP systems

Apparently this is required on old intel hw, see
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=275086#c3

PR:		275086
Reviewed by:	mav, kib
Fixes:		279cd05b7e4d ("Use APIC_IPI_DEST_OTHERS for bitmapped IPIs too.")
MFC after:	1 week
Diagnosed by:	Ben Wilber <ben@desync.com>
Differential Revision:	https://reviews.freebsd.org/D48361
---
 sys/x86/x86/mp_x86.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
index 493017e303e3d9..c0da41a4d2229b 100644
--- a/sys/x86/x86/mp_x86.c
+++ b/sys/x86/x86/mp_x86.c
@@ -1425,6 +1425,9 @@ ipi_all_but_self(u_int ipi)
 	cpuset_t other_cpus;
 	int cpu, c;
 
+	if (mp_ncpus == 1)
+		return;
+
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.

From a3a308f0f29b14d522c56dd88231e27fdf206104 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 4 Jan 2025 07:58:48 +0000
Subject: [PATCH 055/143] lib80211: regdomain: add the two other 160MHz bands

ETSI had one 160Mhz band in regdomain but the other two were missing.
Add them.  I am always confused that the bands use the center frequency
of the 20Mhz edge channels rather than the actual edges so it seems we
are only configuring 140Mhz instead of 160Mhz.

We will have to go through the entire regdomain file one
day and make sure to verify all the power levels.

Sponsored by:	The FreeSBD Foundation
MFC after:	3 days
Reviewed by:	adrian
Differential Revision: https://reviews.freebsd.org/D48356
---
 lib/lib80211/regdomain.xml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/lib/lib80211/regdomain.xml b/lib/lib80211/regdomain.xml
index 5a432f39ccf644..557af0349cb05a 100644
--- a/lib/lib80211/regdomain.xml
+++ b/lib/lib80211/regdomain.xml
@@ -572,6 +572,13 @@
       <flags>IEEE80211_CHAN_VHT80</flags>
       <flags>INDOOR</flags>
     </band>
+    <band>
+      <freqband ref="AC2_5180_5320_160"/>
+      <maxpower>22</maxpower>
+      <flags>IEEE80211_CHAN_HT40</flags>
+      <flags>IEEE80211_CHAN_VHT160</flags>
+      <flags>IEEE80211_CHAN_DFS</flags>
+    </band>
 
     <!-- 5250-5350/80, 100 mW, indoor, DFS -->
     <band>
@@ -651,6 +658,13 @@
       <flags>IEEE80211_CHAN_VHT80</flags>
       <flags>IEEE80211_CHAN_DFS</flags>
     </band>
+    <band>
+      <freqband ref="AC2_5745_5885_160"/>
+      <maxpower>13</maxpower>
+      <flags>IEEE80211_CHAN_HT40</flags>
+      <flags>IEEE80211_CHAN_VHT160</flags>
+      <flags>IEEE80211_CHAN_DFS</flags>
+    </band>
   </netband>
 </rd>
 
@@ -1905,6 +1919,11 @@
   <chanwidth>80</chanwidth> <chansep>20</chansep>
   <flags>IEEE80211_CHAN_A</flags>
 </freqband>
+<freqband id="AC2_5180_5320_160">
+  <freqstart>5180</freqstart> <freqend>5320</freqend>
+  <chanwidth>160</chanwidth> <chansep>20</chansep>
+  <flags>IEEE80211_CHAN_A</flags>
+</freqband>
 <!-- 5250-5350/80 -->
 <freqband id="AC2_5260_5340_20">
   <freqstart>5260</freqstart> <freqend>5340</freqend>
@@ -1958,6 +1977,11 @@
   <chanwidth>80</chanwidth> <chansep>20</chansep>
   <flags>IEEE80211_CHAN_A</flags>
 </freqband>
+<freqband id="AC2_5745_5885_160">
+  <freqstart>5745</freqstart> <freqend>5885</freqend>
+  <chanwidth>160</chanwidth> <chansep>20</chansep>
+  <flags>IEEE80211_CHAN_A</flags>
+</freqband>
 <freqband id="H4_5180_5240">
   <freqstart>5180</freqstart> <freqend>5240</freqend>
   <chanwidth>40</chanwidth> <chansep>20</chansep>

From 5fdc4824a5e2646a07c0638eca9f5c81b0b85fd5 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 4 Jan 2025 08:06:58 +0000
Subject: [PATCH 056/143] net80211: (v)ht: use macros at hand

Rather than duplicating the manual logic here and leaving a comment,
use the self-explanatory macros we already have.

No functional changes intended.

Sponsored by:	The FreeBSD Foundation
MFC after:	3 days
Reviewed by:	adrian
Differential Revision: https://reviews.freebsd.org/D48359
---
 sys/net80211/ieee80211_ht.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c
index e2506c1e0ce09b..2ec5ffb1a2afad 100644
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@@ -1934,9 +1934,7 @@ ieee80211_vht_get_vhtflags(struct ieee80211_node *ni, uint32_t htflags)
 	vhtflags = 0;
 	if (ni->ni_flags & IEEE80211_NODE_VHT && vap->iv_vht_flags & IEEE80211_FVHT_VHT) {
 		if ((ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_160MHZ) &&
-		    /* XXX 2 means "160MHz and 80+80MHz", 1 means "160MHz" */
-		    (_IEEE80211_MASKSHIFT(vap->iv_vht_cap.vht_cap_info,
-		     IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK) >= 1) &&
+		    IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_IS_160MHZ(vap->iv_vht_cap.vht_cap_info) &&
 		    (vap->iv_vht_flags & IEEE80211_FVHT_USEVHT160)) {
 			vhtflags = IEEE80211_CHAN_VHT160;
 			/* Mirror the HT40 flags */
@@ -1946,9 +1944,7 @@ ieee80211_vht_get_vhtflags(struct ieee80211_node *ni, uint32_t htflags)
 				vhtflags |= IEEE80211_CHAN_HT40D;
 			}
 		} else if ((ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_80P80MHZ) &&
-		    /* XXX 2 means "160MHz and 80+80MHz" */
-		    (_IEEE80211_MASKSHIFT(vap->iv_vht_cap.vht_cap_info,
-		     IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK) == 2) &&
+		    IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_IS_160_80P80MHZ(vap->iv_vht_cap.vht_cap_info) &&
 		    (vap->iv_vht_flags & IEEE80211_FVHT_USEVHT80P80)) {
 			vhtflags = IEEE80211_CHAN_VHT80P80;
 			/* Mirror the HT40 flags */

From 2c8b0d6205f6f98855773e3a82640b50abb2f2f6 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sun, 29 Dec 2024 08:07:48 +0000
Subject: [PATCH 057/143] net80211 / LinuxKPI 802.11: correct enum
 ieee80211_sta_rx_bw

When moving the enum from LinuxKPI to net80211 it got adjusted to be
used in net80211 style in order to use it with a print_mask (%b).
Turns out that change broke assumptions given the minimum value of
BW_20 no longer was 0.  Adjust it back to a plain enum starting at 0
and use an inline function to convert to value names.

Pointy hat to:	bz
Fixes:		ca389486a9599768e0ba69dca13c208020623083
MFC after:	3 days
Sponsored by:	The FreeBSD Foundation
Reviewed by:	adrian
Differential Revision: https://reviews.freebsd.org/D48375
---
 sys/net80211/ieee80211_ddb.c  |  4 ++--
 sys/net80211/ieee80211_ht.c   |  4 ++--
 sys/net80211/ieee80211_node.c |  4 ++--
 sys/net80211/ieee80211_node.h | 26 +++++++++++++++++++-------
 4 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/sys/net80211/ieee80211_ddb.c b/sys/net80211/ieee80211_ddb.c
index 0050038457c787..05b370eafa3871 100644
--- a/sys/net80211/ieee80211_ddb.c
+++ b/sys/net80211/ieee80211_ddb.c
@@ -294,9 +294,9 @@ _db_show_sta(const struct ieee80211_node *ni)
 	db_printf("\thtcap %b htparam 0x%x htctlchan %u ht2ndchan %u\n",
 		ni->ni_htcap, IEEE80211_HTCAP_BITS,
 		ni->ni_htparam, ni->ni_htctlchan, ni->ni_ht2ndchan);
-	db_printf("\thtopmode 0x%x htstbc 0x%x chw %b\n",
+	db_printf("\thtopmode 0x%x htstbc 0x%x chw %d (%s)\n",
 		ni->ni_htopmode, ni->ni_htstbc,
-		ni->ni_chw, IEEE80211_NI_CHW_BITS);
+		ni->ni_chw, ieee80211_ni_chw_to_str(ni->ni_chw));
 
 	/* XXX ampdu state */
 	for (i = 0; i < WME_NUM_TID; i++)
diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c
index 2ec5ffb1a2afad..9e047244cc3b47 100644
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@@ -2604,8 +2604,8 @@ ht_recv_action_ht_txchwidth(struct ieee80211_node *ni,
 	    IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
 
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
-	    "%s: HT txchwidth, width %b%s",
-	    __func__, chw, IEEE80211_NI_CHW_BITS, ni->ni_chw != chw ? "*" : "");
+	    "%s: HT txchwidth, width %d%s (%s)", __func__,
+	    chw, ni->ni_chw != chw ? "*" : "", ieee80211_ni_chw_to_str(chw));
 	if (chw != ni->ni_chw) {
 		/* XXX does this need to change the ht40 station count? */
 		ni->ni_chw = chw;
diff --git a/sys/net80211/ieee80211_node.c b/sys/net80211/ieee80211_node.c
index d2a4558970f925..17ddc8533e41cc 100644
--- a/sys/net80211/ieee80211_node.c
+++ b/sys/net80211/ieee80211_node.c
@@ -2672,9 +2672,9 @@ ieee80211_dump_node(struct ieee80211_node_table *nt __unused,
 	printf("\thtcap %x htparam %x htctlchan %u ht2ndchan %u\n",
 		ni->ni_htcap, ni->ni_htparam,
 		ni->ni_htctlchan, ni->ni_ht2ndchan);
-	printf("\thtopmode %x htstbc %x htchw %b\n",
+	printf("\thtopmode %x htstbc %x htchw %d (%s)\n",
 		ni->ni_htopmode, ni->ni_htstbc,
-		ni->ni_chw, IEEE80211_NI_CHW_BITS);
+		ni->ni_chw, ieee80211_ni_chw_to_str(ni->ni_chw));
 	printf("\tvhtcap %x freq1 %d freq2 %d vhtbasicmcs %x\n",
 		ni->ni_vhtcap, (int) ni->ni_vht_chan1, (int) ni->ni_vht_chan2,
 		(int) ni->ni_vht_basicmcs);
diff --git a/sys/net80211/ieee80211_node.h b/sys/net80211/ieee80211_node.h
index 1f36ceb368b93e..0039c743544ce0 100644
--- a/sys/net80211/ieee80211_node.h
+++ b/sys/net80211/ieee80211_node.h
@@ -115,17 +115,29 @@ enum ieee80211_mesh_mlstate {
  * flags. This allows us to keep the uint8_t slot for ni_chw in
  * struct ieee80211_node and means we do not have to sync to the value for
  * LinuxKPI.
+ *
+ * NB: BW_20 needs to 0 and values need to be sorted!  Cannot make it
+ * bitfield-alike for use with %b.
  */
 enum ieee80211_sta_rx_bw {
-	IEEE80211_STA_RX_BW_20		= 0x01,
-	IEEE80211_STA_RX_BW_40		= 0x02,
-	IEEE80211_STA_RX_BW_80		= 0x04,
-	IEEE80211_STA_RX_BW_160		= 0x08,
-	IEEE80211_STA_RX_BW_320		= 0x10,
+	IEEE80211_STA_RX_BW_20		= 0x00,
+	IEEE80211_STA_RX_BW_40,
+	IEEE80211_STA_RX_BW_80,
+	IEEE80211_STA_RX_BW_160,
+	IEEE80211_STA_RX_BW_320,
 } __packed;
 
-#define	IEEE80211_NI_CHW_BITS \
-	"\20\1BW_20\2BW_40\3BW_80\4BW_160\5BW_320"
+static inline const char *
+ieee80211_ni_chw_to_str(enum ieee80211_sta_rx_bw bw)
+{
+	switch (bw) {
+	case IEEE80211_STA_RX_BW_20:	return ("BW_20");
+	case IEEE80211_STA_RX_BW_40:	return ("BW_40");
+	case IEEE80211_STA_RX_BW_80:	return ("BW_80");
+	case IEEE80211_STA_RX_BW_160:	return ("BW_160");
+	case IEEE80211_STA_RX_BW_320:	return ("BW_320");
+	}
+}
 
 /*
  * Node specific information.  Note that drivers are expected

From a4cdb785bbd7e26cc3f2ed0bb4e5cf7ea83c400b Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 28 Dec 2024 10:13:12 +0000
Subject: [PATCH 058/143] LinuxKPI: 802.11: improve the IMPROVE_HT() macro

Let the macro take a format string and arguments and
add __func__, __LINE__ to the output.

Sponsored by:	The FreeBSD Foundation
MFC after:	3 days
---
 sys/compat/linuxkpi/common/src/linux_80211.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sys/compat/linuxkpi/common/src/linux_80211.h b/sys/compat/linuxkpi/common/src/linux_80211.h
index 8605ec86ad1bcb..0c4c615d82e515 100644
--- a/sys/compat/linuxkpi/common/src/linux_80211.h
+++ b/sys/compat/linuxkpi/common/src/linux_80211.h
@@ -78,9 +78,10 @@
     if (linuxkpi_debug_80211 & D80211_IMPROVE_TXQ)			\
 	printf("%s:%d: XXX LKPI80211 IMPROVE_TXQ\n", __func__, __LINE__)
 
-#define	IMPROVE_HT(...)							\
+#define	IMPROVE_HT(fmt, ...)						\
     if (linuxkpi_debug_80211 & D80211_TRACE_MODE_HT)			\
-	printf("%s:%d: XXX LKPI80211 IMPROVE_HT\n", __func__, __LINE__)
+	printf("%s:%d: XXX LKPI80211 IMPROVE_HT " fmt "\n",		\
+	    __func__, __LINE__, ##__VA_ARGS__);
 
 #define	MTAG_ABI_LKPI80211	1707696513	/* LinuxKPI 802.11 KBI */
 

From fd27f86dd71b7ff1df6981297095b88d1d29652e Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Sat, 28 Dec 2024 09:57:56 +0000
Subject: [PATCH 059/143] LinuxKPI: switch jiffies and timer->expire to
 unsigned long

It seems these functions work with unsigned long and not int in Linux.
Start simply replacing the int where I came across it while debugging
a wireless driver timer modification.  Also sprinkle in some "const".

Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
Reviewed by:	emaste
Differential Revision: https://reviews.freebsd.org/D48318
---
 .../linuxkpi/common/include/linux/jiffies.h   | 28 +++++++++----------
 .../linuxkpi/common/include/linux/timer.h     |  4 +--
 sys/compat/linuxkpi/common/src/linux_compat.c |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/linux/jiffies.h b/sys/compat/linuxkpi/common/include/linux/jiffies.h
index bd05a0db076703..8346e74fb830f6 100644
--- a/sys/compat/linuxkpi/common/include/linux/jiffies.h
+++ b/sys/compat/linuxkpi/common/include/linux/jiffies.h
@@ -38,7 +38,7 @@
 
 #define	jiffies			ticks
 #define	jiffies_64		ticks
-#define	jiffies_to_msecs(x)     ((unsigned int)(((int64_t)(int)(x)) * 1000 / hz))
+#define	jiffies_to_msecs(x)     ((unsigned int)(((int64_t)(unsigned long)(x)) * 1000 / hz))
 
 #define	MAX_JIFFY_OFFSET	((INT_MAX >> 1) - 1)
 
@@ -68,7 +68,7 @@ extern uint64_t lkpi_msec2hz_rem;
 extern uint64_t lkpi_msec2hz_div;
 extern uint64_t lkpi_msec2hz_max;
 
-static inline int
+static inline unsigned long
 timespec_to_jiffies(const struct timespec *ts)
 {
 	u64 result;
@@ -78,10 +78,10 @@ timespec_to_jiffies(const struct timespec *ts)
 	if (result > MAX_JIFFY_OFFSET)
 		result = MAX_JIFFY_OFFSET;
 
-	return ((int)result);
+	return ((unsigned long)result);
 }
 
-static inline int
+static inline unsigned long
 msecs_to_jiffies(uint64_t msec)
 {
 	uint64_t result;
@@ -92,10 +92,10 @@ msecs_to_jiffies(uint64_t msec)
 	if (result > MAX_JIFFY_OFFSET)
 		result = MAX_JIFFY_OFFSET;
 
-	return ((int)result);
+	return ((unsigned long)result);
 }
 
-static inline int
+static inline unsigned long
 usecs_to_jiffies(uint64_t usec)
 {
 	uint64_t result;
@@ -106,7 +106,7 @@ usecs_to_jiffies(uint64_t usec)
 	if (result > MAX_JIFFY_OFFSET)
 		result = MAX_JIFFY_OFFSET;
 
-	return ((int)result);
+	return ((unsigned long)result);
 }
 
 static inline uint64_t
@@ -133,17 +133,17 @@ nsecs_to_jiffies(uint64_t nsec)
 }
 
 static inline uint64_t
-jiffies_to_nsecs(int j)
+jiffies_to_nsecs(const unsigned long j)
 {
 
-	return ((1000000000ULL / hz) * (uint64_t)(unsigned int)j);
+	return ((1000000000ULL / hz) * (const uint64_t)j);
 }
 
 static inline uint64_t
-jiffies_to_usecs(int j)
+jiffies_to_usecs(const unsigned long j)
 {
 
-	return ((1000000ULL / hz) * (uint64_t)(unsigned int)j);
+	return ((1000000ULL / hz) * (const uint64_t)j);
 }
 
 static inline uint64_t
@@ -153,10 +153,10 @@ get_jiffies_64(void)
 	return ((uint64_t)(unsigned int)ticks);
 }
 
-static inline int
-linux_timer_jiffies_until(int expires)
+static inline unsigned long
+linux_timer_jiffies_until(unsigned long expires)
 {
-	int delta = expires - jiffies;
+	unsigned long delta = expires - jiffies;
 	/* guard against already expired values */
 	if (delta < 1)
 		delta = 1;
diff --git a/sys/compat/linuxkpi/common/include/linux/timer.h b/sys/compat/linuxkpi/common/include/linux/timer.h
index 8bea082c3e6c71..f9c76222795c8f 100644
--- a/sys/compat/linuxkpi/common/include/linux/timer.h
+++ b/sys/compat/linuxkpi/common/include/linux/timer.h
@@ -42,7 +42,7 @@ struct timer_list {
 		void (*function_415) (struct timer_list *);
 	};
 	unsigned long data;
-	int expires;
+	unsigned long expires;
 };
 
 extern unsigned long linux_timer_hz_mask;
@@ -76,7 +76,7 @@ extern unsigned long linux_timer_hz_mask;
 	callout_init(&(timer)->callout, 1);			\
 } while (0)
 
-extern int mod_timer(struct timer_list *, int);
+extern int mod_timer(struct timer_list *, unsigned long);
 extern void add_timer(struct timer_list *);
 extern void add_timer_on(struct timer_list *, int cpu);
 extern int del_timer(struct timer_list *);
diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index ec3ccb16b47d63..35cb2fc2f3d742 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -1938,7 +1938,7 @@ linux_timer_callback_wrapper(void *context)
 }
 
 int
-mod_timer(struct timer_list *timer, int expires)
+mod_timer(struct timer_list *timer, unsigned long expires)
 {
 	int ret;
 

From d82bfe73a3f4f3f38757c2e064047f09629ec7b7 Mon Sep 17 00:00:00 2001
From: Adrian Chadd <adrian@FreeBSD.org>
Date: Mon, 16 Dec 2024 12:11:16 -0800
Subject: [PATCH 060/143] rtwn: don't set the RTS/CTS primary channel field for
 RTL8812AU/RTL8821AU

According to the rtl8812au reference driver, this seems to control
the bandwidth used by lower-bandwidth frames when transmitted in
a higher bandwidth channel.  For example, transmitting a 20MHz frame
on an 80MHz channel (eg in hostap mode) is doable, but you may want
to at least duplicate the RTS/CTS exchange across all four 20MHz
subchannels, AND perhaps duplicate the 20MHz frame.

I haven't fired this up with a spectrum analyser to see what the
result is.

The vendor driver doesn't bother with this and it doesn't change
performance.  My guess is that for modes like AP mode we MAY wantto
be able to control the RTS/CTS bandwidth choices rather than letting
the firmare do it, but we're not there yet.

The rtl8812au code in hal/rtl8812a_xmit.c:SCMapping_8812() has
the gory details, but then the one place it's used just has it
commented out and 0 (ie "do not care") is always programmed in.

Differential Revision:	https://reviews.freebsd.org/D48113
Obtained from:	https://github.com/lwfinger/rtl8812au
Reviewed by:	bz
---
 sys/dev/rtwn/rtl8812a/r12a_tx.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/sys/dev/rtwn/rtl8812a/r12a_tx.c b/sys/dev/rtwn/rtl8812a/r12a_tx.c
index 582e6e0ddaf464..336ad75a0b1f45 100644
--- a/sys/dev/rtwn/rtl8812a/r12a_tx.c
+++ b/sys/dev/rtwn/rtl8812a/r12a_tx.c
@@ -56,14 +56,35 @@
 #include <dev/rtwn/rtl8812a/r12a.h>
 #include <dev/rtwn/rtl8812a/r12a_tx_desc.h>
 
+/*
+ * This function actually handles the secondary channel mapping,
+ * not the primary channel mapping.  It hints to the MAC where
+ * to handle duplicate transmission of the RTS/CTS and payload
+ * frames when the requested transmit channel width is less than
+ * the configured channel width.
+ *
+ * Note: the vendor driver and linux rtw88 driver both leave this
+ * field currently set to 0.
+ *
+ * See the rtl8812au vendor driver, hal/rtl8812a_xmit.c:SCMapping_8812()
+ * and where it's used (and ignored.)
+ */
 static int
 r12a_get_primary_channel(struct rtwn_softc *sc, struct ieee80211_channel *c)
 {
+#if 0
 	/* XXX VHT80; VHT40 */
 	if (IEEE80211_IS_CHAN_HT40U(c))
 		return (R12A_TXDW5_PRIM_CHAN_20_80_2);
 	else
 		return (R12A_TXDW5_PRIM_CHAN_20_80_3);
+#endif
+
+	/*
+	 * For now just return the VHT_DATA_SC_DONOT_CARE value
+	 * from the reference driver.
+	 */
+	return (0);
 }
 
 static void

From dd58d03a2a46dddf2ce661d623224a947751beff Mon Sep 17 00:00:00 2001
From: Adrian Chadd <adrian@FreeBSD.org>
Date: Mon, 16 Dec 2024 21:36:02 -0800
Subject: [PATCH 061/143] rtwn: set the maximum A-MPDU size correctly for
 RTL8812AU/RTL8821AU

The vendor driver sets it to 64k or 128k depending upon chipset,
along with bit 31 being set in hal/rtl8812a_hal_init.c:SetHwReg8812A().

Differential Revision:	 https://reviews.freebsd.org/D48118
Obtained from:	https://github.com/lwfinger/rtl8812au
Reviewed by:	bz
---
 sys/dev/rtwn/rtl8812a/r12a_var.h         |  1 +
 sys/dev/rtwn/rtl8812a/usb/r12au_attach.c |  1 +
 sys/dev/rtwn/rtl8812a/usb/r12au_init.c   | 12 +++++++++++-
 sys/dev/rtwn/rtl8821a/usb/r21au_attach.c |  1 +
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sys/dev/rtwn/rtl8812a/r12a_var.h b/sys/dev/rtwn/rtl8812a/r12a_var.h
index 182e6b90275867..0a76e013b6a733 100644
--- a/sys/dev/rtwn/rtl8812a/r12a_var.h
+++ b/sys/dev/rtwn/rtl8812a/r12a_var.h
@@ -99,6 +99,7 @@ struct r12a_softc {
 	int		ac_usb_dma_size;
 	int		ac_usb_dma_time;
 	int		ampdu_max_time;
+	int		ampdu_max_size;
 };
 #define	R12A_SOFTC(_sc)	((struct r12a_softc *)((_sc)->sc_priv))
 
diff --git a/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c b/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c
index 84bfcfbda0e8f6..c87bffb4db197d 100644
--- a/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c
+++ b/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c
@@ -141,6 +141,7 @@ r12a_attach_private(struct rtwn_softc *sc)
 	rs->rs_iq_calib_sw		= r12a_iq_calib_sw;
 
 	rs->ampdu_max_time		= 0x70;
+	rs->ampdu_max_size		= 0x1ffff; /* 128k */
 
 	sc->sc_priv = rs;
 }
diff --git a/sys/dev/rtwn/rtl8812a/usb/r12au_init.c b/sys/dev/rtwn/rtl8812a/usb/r12au_init.c
index ac6a599895acbb..1bee2c66565742 100644
--- a/sys/dev/rtwn/rtl8812a/usb/r12au_init.c
+++ b/sys/dev/rtwn/rtl8812a/usb/r12au_init.c
@@ -142,7 +142,17 @@ r12au_init_ampdu(struct rtwn_softc *sc)
 
 	/* Setup AMPDU aggregation. */
 	rtwn_write_1(sc, R12A_AMPDU_MAX_TIME, rs->ampdu_max_time);
-	rtwn_write_4(sc, R12A_AMPDU_MAX_LENGTH, 0xffffffff);
+	/*
+	 * Note: The vendor driver (hal/rtl8812a_hal_init.c:SetHwReg8812A())
+	 * also sets bit 31.
+	 */
+	/*
+	 * TODO: this should be limited to the peer in STA mode,
+	 * and perhaps the minimum A-MPDU of all VAPs/peers in
+	 * multi-STA / other operating modes.
+	 */
+	rtwn_write_4(sc, R12A_AMPDU_MAX_LENGTH,
+	    rs->ampdu_max_size | (1<<31));
 
 	/* 80 MHz clock (again?) */
 	rtwn_write_1(sc, R92C_USTIME_TSF, 0x50);
diff --git a/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c b/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
index 9f0e2c950a1e46..175bac8f6fc9a9 100644
--- a/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
+++ b/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
@@ -141,6 +141,7 @@ r21a_attach_private(struct rtwn_softc *sc)
 	rs->rs_iq_calib_sw		= r21a_iq_calib_sw;
 
 	rs->ampdu_max_time		= 0x5e;
+	rs->ampdu_max_size		= 0xffff; /* 64k */
 
 	rs->ac_usb_dma_size		= 0x01;
 	rs->ac_usb_dma_time		= 0x10;

From 7f8f120439b77e60a1070d87f4dc6cb9a43d0335 Mon Sep 17 00:00:00 2001
From: Xin LI <delphij@FreeBSD.org>
Date: Tue, 7 Jan 2025 20:42:16 -0800
Subject: [PATCH 062/143] libmagic: Unbreak for older FreeBSD releases.

byteswap.h is introduced in FreeBSD 13.2 but was not available in
earlier versions.  In order to support upgrading from an earlier
FreeBSD release we would need to tell the build system that fact.

PR:		bin/273736
Reported by:	philip
MFC after:	3 days
---
 lib/libmagic/config.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/libmagic/config.h b/lib/libmagic/config.h
index 12cd382ab9bb77..d642880d609629 100644
--- a/lib/libmagic/config.h
+++ b/lib/libmagic/config.h
@@ -1,6 +1,9 @@
 /* config.h.  Generated from config.h.in by configure.  */
 /* config.h.in.  Generated from configure.ac by autoheader.  */
 
+/* FreeBSD */
+#include <osreldate.h>
+
 /* Define if building universal (internal helper macro) */
 /* #undef AC_APPLE_UNIVERSAL_BUILD */
 
@@ -21,8 +24,10 @@
 
 /* Define to 1 if you have the <byteswap.h> header file. */
 #ifndef __APPLE__ /* Cross building tools on macOS */
+#if __FreeBSD_version >= 1400079 || (__FreeBSD_version < 1400000 && __FreeBSD_version >= 1302500)
 #define HAVE_BYTESWAP_H 1
 #endif
+#endif
 
 /* Define to 1 if you have the <bzlib.h> header file. */
 /* #undef HAVE_BZLIB_H */

From 2bb0efbb7b64fa957d46d4f443b000f375fc03d4 Mon Sep 17 00:00:00 2001
From: "Bjoern A. Zeeb" <bz@FreeBSD.org>
Date: Wed, 8 Jan 2025 08:30:00 +0000
Subject: [PATCH 063/143] Revert: LinuxKPI: switch jiffies and timer->expire to
 unsigned long

There are possible problems when jiffies (ticks) which still are int
wrap around.  Also given this did not touch every single place some
checks may be broken now.

Reported by:	markj
This reverts commit fd27f86dd71b7ff1df6981297095b88d1d29652e.
---
 .../linuxkpi/common/include/linux/jiffies.h   | 28 +++++++++----------
 .../linuxkpi/common/include/linux/timer.h     |  4 +--
 sys/compat/linuxkpi/common/src/linux_compat.c |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/linux/jiffies.h b/sys/compat/linuxkpi/common/include/linux/jiffies.h
index 8346e74fb830f6..bd05a0db076703 100644
--- a/sys/compat/linuxkpi/common/include/linux/jiffies.h
+++ b/sys/compat/linuxkpi/common/include/linux/jiffies.h
@@ -38,7 +38,7 @@
 
 #define	jiffies			ticks
 #define	jiffies_64		ticks
-#define	jiffies_to_msecs(x)     ((unsigned int)(((int64_t)(unsigned long)(x)) * 1000 / hz))
+#define	jiffies_to_msecs(x)     ((unsigned int)(((int64_t)(int)(x)) * 1000 / hz))
 
 #define	MAX_JIFFY_OFFSET	((INT_MAX >> 1) - 1)
 
@@ -68,7 +68,7 @@ extern uint64_t lkpi_msec2hz_rem;
 extern uint64_t lkpi_msec2hz_div;
 extern uint64_t lkpi_msec2hz_max;
 
-static inline unsigned long
+static inline int
 timespec_to_jiffies(const struct timespec *ts)
 {
 	u64 result;
@@ -78,10 +78,10 @@ timespec_to_jiffies(const struct timespec *ts)
 	if (result > MAX_JIFFY_OFFSET)
 		result = MAX_JIFFY_OFFSET;
 
-	return ((unsigned long)result);
+	return ((int)result);
 }
 
-static inline unsigned long
+static inline int
 msecs_to_jiffies(uint64_t msec)
 {
 	uint64_t result;
@@ -92,10 +92,10 @@ msecs_to_jiffies(uint64_t msec)
 	if (result > MAX_JIFFY_OFFSET)
 		result = MAX_JIFFY_OFFSET;
 
-	return ((unsigned long)result);
+	return ((int)result);
 }
 
-static inline unsigned long
+static inline int
 usecs_to_jiffies(uint64_t usec)
 {
 	uint64_t result;
@@ -106,7 +106,7 @@ usecs_to_jiffies(uint64_t usec)
 	if (result > MAX_JIFFY_OFFSET)
 		result = MAX_JIFFY_OFFSET;
 
-	return ((unsigned long)result);
+	return ((int)result);
 }
 
 static inline uint64_t
@@ -133,17 +133,17 @@ nsecs_to_jiffies(uint64_t nsec)
 }
 
 static inline uint64_t
-jiffies_to_nsecs(const unsigned long j)
+jiffies_to_nsecs(int j)
 {
 
-	return ((1000000000ULL / hz) * (const uint64_t)j);
+	return ((1000000000ULL / hz) * (uint64_t)(unsigned int)j);
 }
 
 static inline uint64_t
-jiffies_to_usecs(const unsigned long j)
+jiffies_to_usecs(int j)
 {
 
-	return ((1000000ULL / hz) * (const uint64_t)j);
+	return ((1000000ULL / hz) * (uint64_t)(unsigned int)j);
 }
 
 static inline uint64_t
@@ -153,10 +153,10 @@ get_jiffies_64(void)
 	return ((uint64_t)(unsigned int)ticks);
 }
 
-static inline unsigned long
-linux_timer_jiffies_until(unsigned long expires)
+static inline int
+linux_timer_jiffies_until(int expires)
 {
-	unsigned long delta = expires - jiffies;
+	int delta = expires - jiffies;
 	/* guard against already expired values */
 	if (delta < 1)
 		delta = 1;
diff --git a/sys/compat/linuxkpi/common/include/linux/timer.h b/sys/compat/linuxkpi/common/include/linux/timer.h
index f9c76222795c8f..8bea082c3e6c71 100644
--- a/sys/compat/linuxkpi/common/include/linux/timer.h
+++ b/sys/compat/linuxkpi/common/include/linux/timer.h
@@ -42,7 +42,7 @@ struct timer_list {
 		void (*function_415) (struct timer_list *);
 	};
 	unsigned long data;
-	unsigned long expires;
+	int expires;
 };
 
 extern unsigned long linux_timer_hz_mask;
@@ -76,7 +76,7 @@ extern unsigned long linux_timer_hz_mask;
 	callout_init(&(timer)->callout, 1);			\
 } while (0)
 
-extern int mod_timer(struct timer_list *, unsigned long);
+extern int mod_timer(struct timer_list *, int);
 extern void add_timer(struct timer_list *);
 extern void add_timer_on(struct timer_list *, int cpu);
 extern int del_timer(struct timer_list *);
diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index 35cb2fc2f3d742..ec3ccb16b47d63 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -1938,7 +1938,7 @@ linux_timer_callback_wrapper(void *context)
 }
 
 int
-mod_timer(struct timer_list *timer, unsigned long expires)
+mod_timer(struct timer_list *timer, int expires)
 {
 	int ret;
 

From cad1d13af789b82d829923795cccbf37cdf93b51 Mon Sep 17 00:00:00 2001
From: Emmanuel Vadot <manu@FreeBSD.org>
Date: Wed, 8 Jan 2025 10:34:06 +0100
Subject: [PATCH 064/143] dts: Bump the freebsd branding version to 6.12

---
 sys/dts/freebsd-compatible.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/dts/freebsd-compatible.dts b/sys/dts/freebsd-compatible.dts
index 7d602f70a83bbb..ffdc2e542a22d0 100644
--- a/sys/dts/freebsd-compatible.dts
+++ b/sys/dts/freebsd-compatible.dts
@@ -1,3 +1,3 @@
 / {
-	freebsd,dts-version = "6.8";
+	freebsd,dts-version = "6.12";
 };

From 1f7c379c07168029694a9a33bc437b05cdee623e Mon Sep 17 00:00:00 2001
From: Alexander Motin <mav@FreeBSD.org>
Date: Wed, 8 Jan 2025 13:04:34 -0500
Subject: [PATCH 065/143] isp: Fix abort issue introduced by previous commit

Aborting ATIO while its CTIOs are in progress makes impossible to
handle their completions, making them stuck forever.  Detect this
case by checking ctcnt counter and if so instead of aborting just
mark the ATIO as dead to block any new CTIOs.  It is not perfect
since the task id can not be reused for some more time, but not
as bad as the task stuck forever.

MFC after:	1 week
---
 etc/mtree/BSD.include.dist |  2 ++
 include/Makefile           |  2 +-
 sys/conf/files.amd64       |  2 ++
 sys/dev/isp/isp_freebsd.c  | 19 +++++++++++++++++--
 sys/dev/isp/isp_freebsd.h  |  3 ++-
 sys/modules/Makefile       |  2 ++
 usr.sbin/Makefile          |  1 +
 7 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/etc/mtree/BSD.include.dist b/etc/mtree/BSD.include.dist
index ad1b8a5f741350..0a2dbea23d5a6a 100644
--- a/etc/mtree/BSD.include.dist
+++ b/etc/mtree/BSD.include.dist
@@ -149,6 +149,8 @@
         ..
         io
         ..
+        ixnvdimm
+        ..
         mfi
         ..
         mlx5
diff --git a/include/Makefile b/include/Makefile
index 16d641b42a908c..0c71f1518a914f 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -50,7 +50,7 @@ LDIRS=	geom net net80211 netgraph netinet netinet6 \
 
 LSUBDIRS=	dev/acpica dev/agp dev/ciss dev/filemon dev/firewire \
 	dev/hwpmc dev/hyperv \
-	dev/ic dev/iicbus dev/io dev/mfi dev/mmc \
+	dev/ic dev/iicbus dev/io dev/ixnvdimm dev/mfi dev/mmc \
 	dev/ofw dev/pbio dev/pci ${_dev_powermac_nvram} dev/ppbus dev/pwm \
 	dev/smbus dev/speaker dev/tcp_log dev/veriexec dev/vkbd dev/wg \
 	fs/devfs fs/fdescfs fs/msdosfs fs/nfs fs/nullfs \
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 0584fc29d03963..571e61f6b26428 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -235,6 +235,8 @@ dev/ixl/i40e_adminq.c		optional	ixl pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/i40e_dcb.c		optional	ixl pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
+dev/ixnvdimm/ixnvdimm.c		optional	ixnvdimm
+dev/ixnvdimm/ixnvdimm_copy.S	optional	ixnvdimm
 dev/ncthwm/ncthwm.c		optional	ncthwm superio
 dev/qlxge/qls_dbg.c		optional	qlxge pci
 dev/qlxge/qls_dump.c		optional	qlxge pci
diff --git a/sys/dev/isp/isp_freebsd.c b/sys/dev/isp/isp_freebsd.c
index d5aa7a54142eaf..b496eae1b466ea 100644
--- a/sys/dev/isp/isp_freebsd.c
+++ b/sys/dev/isp/isp_freebsd.c
@@ -986,6 +986,16 @@ isp_target_start_ctio(ispsoftc_t *isp, union ccb *ccb, enum Start_Ctio_How how)
 			continue;
 		}
 
+		/*
+		 * Is this command a dead duck?
+		 */
+		if (atp->dead) {
+			isp_prt(isp, ISP_LOGERR, "%s: [0x%x] not sending a CTIO for a dead command", __func__, cso->tag_id);
+			ccb->ccb_h.status = CAM_REQ_ABORTED;
+			xpt_done(ccb);
+			continue;
+		}
+
 		/*
 		 * Check to make sure we're still in target mode.
 		 */
@@ -2503,14 +2513,19 @@ isp_action(struct cam_sim *sim, union ccb *ccb)
 		}
 
 		/*
-		 * Target should abort all affected CCBs before ACK-ing INOT,
+		 * Target should abort all affected tasks before ACK-ing INOT,
 		 * but if/since it doesn't, add this hack to allow tag reuse.
+		 * We can not do it if some CTIOs are in progress, or we won't
+		 * handle the completions.  In such case just block new ones.
 		 */
 		uint32_t rsp = (ccb->ccb_h.flags & CAM_SEND_STATUS) ? ccb->cna2.arg : 0;
 		if (ntp->nt.nt_ncode == NT_ABORT_TASK && (rsp & 0xff) == 0 &&
 		    (atp = isp_find_atpd(isp, XS_CHANNEL(ccb), ccb->cna2.seq_id)) != NULL) {
-			if (isp_abort_atpd(isp, XS_CHANNEL(ccb), atp) == 0)
+			if (atp->ctcnt == 0 &&
+			    isp_abort_atpd(isp, XS_CHANNEL(ccb), atp) == 0)
 				isp_put_atpd(isp, XS_CHANNEL(ccb), atp);
+			else
+				atp->dead = 1;
 		}
 
 		if (isp_handle_platform_target_notify_ack(isp, &ntp->nt, rsp)) {
diff --git a/sys/dev/isp/isp_freebsd.h b/sys/dev/isp/isp_freebsd.h
index 5bb3dd43b6de41..73390fa14769fb 100644
--- a/sys/dev/isp/isp_freebsd.h
+++ b/sys/dev/isp/isp_freebsd.h
@@ -104,8 +104,9 @@ typedef struct atio_private_data {
 	uint16_t	ctcnt;	/* number of CTIOs currently active */
 	uint8_t		seqno;	/* CTIO sequence number */
 	uint8_t		cdb0;
-	uint8_t		srr_notify_rcvd	: 1,
+	uint16_t	srr_notify_rcvd	: 1,
 			sendst		: 1,
+			dead		: 1,
 			tattr		: 3,
 			state		: 3;
 	void *		ests;
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 294cb5a224de34..3451928e2e5324 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -203,6 +203,7 @@ SUBDIR=	\
 	${_ix} \
 	${_ixv} \
 	${_ixl} \
+	${_ixnvdimm} \
 	jme \
 	kbdmux \
 	kgssapi \
@@ -834,6 +835,7 @@ _enic=		enic
 _iavf=		iavf
 _ioat=		ioat
 _ixl=		ixl
+_ixnvdimm=	ixnvdimm
 _nvdimm=	nvdimm
 _pms=		pms
 _qat=		qat
diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile
index e039cea6ee167a..2eeff698083618 100644
--- a/usr.sbin/Makefile
+++ b/usr.sbin/Makefile
@@ -34,6 +34,7 @@ SUBDIR=	adduser \
 	ifmcstat \
 	iostat \
 	iovctl \
+	ixnvdimm \
 	kldxref \
 	mailwrapper \
 	makefs \

From e6c96c7af717b459aea4126590ba413d29f283bf Mon Sep 17 00:00:00 2001
From: Alexander Motin <mav@FreeBSD.org>
Date: Wed, 8 Jan 2025 13:20:09 -0500
Subject: [PATCH 066/143] Revert "isp: Fix abort issue introduced by previous
 commit"

This reverts commit 1f7c379c07168029694a9a33bc437b05cdee623e.

Leaked unintended changes.  I'm sorry.
---
 etc/mtree/BSD.include.dist |  2 --
 include/Makefile           |  2 +-
 sys/conf/files.amd64       |  2 --
 sys/dev/isp/isp_freebsd.c  | 19 ++-----------------
 sys/dev/isp/isp_freebsd.h  |  3 +--
 sys/modules/Makefile       |  2 --
 usr.sbin/Makefile          |  1 -
 7 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/etc/mtree/BSD.include.dist b/etc/mtree/BSD.include.dist
index 0a2dbea23d5a6a..ad1b8a5f741350 100644
--- a/etc/mtree/BSD.include.dist
+++ b/etc/mtree/BSD.include.dist
@@ -149,8 +149,6 @@
         ..
         io
         ..
-        ixnvdimm
-        ..
         mfi
         ..
         mlx5
diff --git a/include/Makefile b/include/Makefile
index 0c71f1518a914f..16d641b42a908c 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -50,7 +50,7 @@ LDIRS=	geom net net80211 netgraph netinet netinet6 \
 
 LSUBDIRS=	dev/acpica dev/agp dev/ciss dev/filemon dev/firewire \
 	dev/hwpmc dev/hyperv \
-	dev/ic dev/iicbus dev/io dev/ixnvdimm dev/mfi dev/mmc \
+	dev/ic dev/iicbus dev/io dev/mfi dev/mmc \
 	dev/ofw dev/pbio dev/pci ${_dev_powermac_nvram} dev/ppbus dev/pwm \
 	dev/smbus dev/speaker dev/tcp_log dev/veriexec dev/vkbd dev/wg \
 	fs/devfs fs/fdescfs fs/msdosfs fs/nfs fs/nullfs \
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 571e61f6b26428..0584fc29d03963 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -235,8 +235,6 @@ dev/ixl/i40e_adminq.c		optional	ixl pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/i40e_dcb.c		optional	ixl pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
-dev/ixnvdimm/ixnvdimm.c		optional	ixnvdimm
-dev/ixnvdimm/ixnvdimm_copy.S	optional	ixnvdimm
 dev/ncthwm/ncthwm.c		optional	ncthwm superio
 dev/qlxge/qls_dbg.c		optional	qlxge pci
 dev/qlxge/qls_dump.c		optional	qlxge pci
diff --git a/sys/dev/isp/isp_freebsd.c b/sys/dev/isp/isp_freebsd.c
index b496eae1b466ea..d5aa7a54142eaf 100644
--- a/sys/dev/isp/isp_freebsd.c
+++ b/sys/dev/isp/isp_freebsd.c
@@ -986,16 +986,6 @@ isp_target_start_ctio(ispsoftc_t *isp, union ccb *ccb, enum Start_Ctio_How how)
 			continue;
 		}
 
-		/*
-		 * Is this command a dead duck?
-		 */
-		if (atp->dead) {
-			isp_prt(isp, ISP_LOGERR, "%s: [0x%x] not sending a CTIO for a dead command", __func__, cso->tag_id);
-			ccb->ccb_h.status = CAM_REQ_ABORTED;
-			xpt_done(ccb);
-			continue;
-		}
-
 		/*
 		 * Check to make sure we're still in target mode.
 		 */
@@ -2513,19 +2503,14 @@ isp_action(struct cam_sim *sim, union ccb *ccb)
 		}
 
 		/*
-		 * Target should abort all affected tasks before ACK-ing INOT,
+		 * Target should abort all affected CCBs before ACK-ing INOT,
 		 * but if/since it doesn't, add this hack to allow tag reuse.
-		 * We can not do it if some CTIOs are in progress, or we won't
-		 * handle the completions.  In such case just block new ones.
 		 */
 		uint32_t rsp = (ccb->ccb_h.flags & CAM_SEND_STATUS) ? ccb->cna2.arg : 0;
 		if (ntp->nt.nt_ncode == NT_ABORT_TASK && (rsp & 0xff) == 0 &&
 		    (atp = isp_find_atpd(isp, XS_CHANNEL(ccb), ccb->cna2.seq_id)) != NULL) {
-			if (atp->ctcnt == 0 &&
-			    isp_abort_atpd(isp, XS_CHANNEL(ccb), atp) == 0)
+			if (isp_abort_atpd(isp, XS_CHANNEL(ccb), atp) == 0)
 				isp_put_atpd(isp, XS_CHANNEL(ccb), atp);
-			else
-				atp->dead = 1;
 		}
 
 		if (isp_handle_platform_target_notify_ack(isp, &ntp->nt, rsp)) {
diff --git a/sys/dev/isp/isp_freebsd.h b/sys/dev/isp/isp_freebsd.h
index 73390fa14769fb..5bb3dd43b6de41 100644
--- a/sys/dev/isp/isp_freebsd.h
+++ b/sys/dev/isp/isp_freebsd.h
@@ -104,9 +104,8 @@ typedef struct atio_private_data {
 	uint16_t	ctcnt;	/* number of CTIOs currently active */
 	uint8_t		seqno;	/* CTIO sequence number */
 	uint8_t		cdb0;
-	uint16_t	srr_notify_rcvd	: 1,
+	uint8_t		srr_notify_rcvd	: 1,
 			sendst		: 1,
-			dead		: 1,
 			tattr		: 3,
 			state		: 3;
 	void *		ests;
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 3451928e2e5324..294cb5a224de34 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -203,7 +203,6 @@ SUBDIR=	\
 	${_ix} \
 	${_ixv} \
 	${_ixl} \
-	${_ixnvdimm} \
 	jme \
 	kbdmux \
 	kgssapi \
@@ -835,7 +834,6 @@ _enic=		enic
 _iavf=		iavf
 _ioat=		ioat
 _ixl=		ixl
-_ixnvdimm=	ixnvdimm
 _nvdimm=	nvdimm
 _pms=		pms
 _qat=		qat
diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile
index 2eeff698083618..e039cea6ee167a 100644
--- a/usr.sbin/Makefile
+++ b/usr.sbin/Makefile
@@ -34,7 +34,6 @@ SUBDIR=	adduser \
 	ifmcstat \
 	iostat \
 	iovctl \
-	ixnvdimm \
 	kldxref \
 	mailwrapper \
 	makefs \

From 2c48a8f161c91bf7020122697d064a25287097a3 Mon Sep 17 00:00:00 2001
From: Alexander Motin <mav@FreeBSD.org>
Date: Wed, 8 Jan 2025 13:23:26 -0500
Subject: [PATCH 067/143] isp: Fix abort issue introduced by previous commit

Aborting ATIO while its CTIOs are in progress makes impossible to
handle their completions, making them stuck forever.  Detect this
case by checking ctcnt counter and if so instead of aborting just
mark the ATIO as dead to block any new CTIOs.  It is not perfect
since the task id can not be reused for some more time, but not
as bad as the task stuck forever.

MFC after:	1 week
---
 sys/dev/isp/isp_freebsd.c | 19 +++++++++++++++++--
 sys/dev/isp/isp_freebsd.h |  3 ++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/sys/dev/isp/isp_freebsd.c b/sys/dev/isp/isp_freebsd.c
index d5aa7a54142eaf..b496eae1b466ea 100644
--- a/sys/dev/isp/isp_freebsd.c
+++ b/sys/dev/isp/isp_freebsd.c
@@ -986,6 +986,16 @@ isp_target_start_ctio(ispsoftc_t *isp, union ccb *ccb, enum Start_Ctio_How how)
 			continue;
 		}
 
+		/*
+		 * Is this command a dead duck?
+		 */
+		if (atp->dead) {
+			isp_prt(isp, ISP_LOGERR, "%s: [0x%x] not sending a CTIO for a dead command", __func__, cso->tag_id);
+			ccb->ccb_h.status = CAM_REQ_ABORTED;
+			xpt_done(ccb);
+			continue;
+		}
+
 		/*
 		 * Check to make sure we're still in target mode.
 		 */
@@ -2503,14 +2513,19 @@ isp_action(struct cam_sim *sim, union ccb *ccb)
 		}
 
 		/*
-		 * Target should abort all affected CCBs before ACK-ing INOT,
+		 * Target should abort all affected tasks before ACK-ing INOT,
 		 * but if/since it doesn't, add this hack to allow tag reuse.
+		 * We can not do it if some CTIOs are in progress, or we won't
+		 * handle the completions.  In such case just block new ones.
 		 */
 		uint32_t rsp = (ccb->ccb_h.flags & CAM_SEND_STATUS) ? ccb->cna2.arg : 0;
 		if (ntp->nt.nt_ncode == NT_ABORT_TASK && (rsp & 0xff) == 0 &&
 		    (atp = isp_find_atpd(isp, XS_CHANNEL(ccb), ccb->cna2.seq_id)) != NULL) {
-			if (isp_abort_atpd(isp, XS_CHANNEL(ccb), atp) == 0)
+			if (atp->ctcnt == 0 &&
+			    isp_abort_atpd(isp, XS_CHANNEL(ccb), atp) == 0)
 				isp_put_atpd(isp, XS_CHANNEL(ccb), atp);
+			else
+				atp->dead = 1;
 		}
 
 		if (isp_handle_platform_target_notify_ack(isp, &ntp->nt, rsp)) {
diff --git a/sys/dev/isp/isp_freebsd.h b/sys/dev/isp/isp_freebsd.h
index 5bb3dd43b6de41..73390fa14769fb 100644
--- a/sys/dev/isp/isp_freebsd.h
+++ b/sys/dev/isp/isp_freebsd.h
@@ -104,8 +104,9 @@ typedef struct atio_private_data {
 	uint16_t	ctcnt;	/* number of CTIOs currently active */
 	uint8_t		seqno;	/* CTIO sequence number */
 	uint8_t		cdb0;
-	uint8_t		srr_notify_rcvd	: 1,
+	uint16_t	srr_notify_rcvd	: 1,
 			sendst		: 1,
+			dead		: 1,
 			tattr		: 3,
 			state		: 3;
 	void *		ests;

From 4c89d59e0cdac4d83fb5841aefae9214545b2273 Mon Sep 17 00:00:00 2001
From: Michael Tuexen <tuexen@FreeBSD.org>
Date: Wed, 8 Jan 2025 23:15:54 +0100
Subject: [PATCH 068/143] TCP RACK: don't log an uninitialized value

reduce is uninitialized, if the code path for logging is reached via
goto old_method;.

Reviewed by:		rrs, Peter Lei
CID:			1557359
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D48346
---
 sys/netinet/tcp_stacks/rack.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index f590edd71d9d3c..902845ad34f6d1 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -17474,7 +17474,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 		 * the peer to have a gap in data sending.
 		 */
 		uint64_t cwnd, tr_perms = 0;
-		int32_t reduce = 0;
+		int32_t reduce;
 
 	old_method:
 		/*
@@ -17511,7 +17511,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 				slot -= reduce;
 			} else
 				slot = 0;
-		}
+		} else
+			reduce = 0;
 		slot *= HPTS_USEC_IN_MSEC;
 		if (rack->rc_pace_to_cwnd) {
 			uint64_t rate_wanted = 0;

From 912a05670ed9545a1d1b010eedafb819e14eb1b8 Mon Sep 17 00:00:00 2001
From: Adrian Chadd <adrian@FreeBSD.org>
Date: Sun, 15 Dec 2024 18:29:33 -0800
Subject: [PATCH 069/143] net80211: add helper functions for VHT transmit

* Add static helper functions for VHT TX for 20MHz, 40MHz and 80MHz.
* Add a public function to check if the given VHT width is available
  for transmit.

Differential Revision:	https://reviews.freebsd.org/D48101
---
 sys/net80211/ieee80211_vht.c | 137 +++++++++++++++++++++++++++++++++++
 sys/net80211/ieee80211_vht.h |   4 +
 2 files changed, 141 insertions(+)

diff --git a/sys/net80211/ieee80211_vht.c b/sys/net80211/ieee80211_vht.c
index a05beb91216f48..82879f90c67b5e 100644
--- a/sys/net80211/ieee80211_vht.c
+++ b/sys/net80211/ieee80211_vht.c
@@ -873,3 +873,140 @@ ieee80211_vht_get_vhtinfo_ie(struct ieee80211_node *ni,
 {
 	printf("%s: called; TODO!\n", __func__);
 }
+
+/*
+ * Return true if VHT rates can be used for the given node.
+ */
+bool
+ieee80211_vht_check_tx_vht(const struct ieee80211_node *ni)
+{
+	const struct ieee80211vap *vap;
+	const struct ieee80211_channel *bss_chan;
+
+	if (ni == NULL || ni->ni_chan == IEEE80211_CHAN_ANYC ||
+	    ni->ni_vap == NULL || ni->ni_vap->iv_bss == NULL)
+		return (false);
+
+	vap = ni->ni_vap;
+	bss_chan = vap->iv_bss->ni_chan;
+
+	if (bss_chan == IEEE80211_CHAN_ANYC)
+		return (false);
+
+	return (IEEE80211_IS_CHAN_VHT(ni->ni_chan));
+}
+
+/*
+ * Return true if VHT40 rates can be transmitted to the given node.
+ *
+ * This verifies that the BSS is VHT40 capable and the current
+ * node channel width is 40MHz.
+ */
+static bool
+ieee80211_vht_check_tx_vht40(const struct ieee80211_node *ni)
+{
+	struct ieee80211vap *vap;
+	struct ieee80211_channel *bss_chan;
+
+	if (!ieee80211_vht_check_tx_vht(ni))
+		return (false);
+
+	vap = ni->ni_vap;
+	bss_chan = vap->iv_bss->ni_chan;
+
+	return (IEEE80211_IS_CHAN_VHT40(bss_chan) &&
+	    IEEE80211_IS_CHAN_VHT40(ni->ni_chan) &&
+	    (ni->ni_chw == IEEE80211_STA_RX_BW_40));
+}
+
+/*
+ * Return true if VHT80 rates can be transmitted to the given node.
+ *
+ * This verifies that the BSS is VHT80 capable and the current
+ * node channel width is 80MHz.
+ */
+static bool
+ieee80211_vht_check_tx_vht80(const struct ieee80211_node *ni)
+{
+	struct ieee80211vap *vap;
+	struct ieee80211_channel *bss_chan;
+
+	if (!ieee80211_vht_check_tx_vht(ni))
+		return (false);
+
+	vap = ni->ni_vap;
+	bss_chan = vap->iv_bss->ni_chan;
+
+	return (IEEE80211_IS_CHAN_VHT80(bss_chan) &&
+	    IEEE80211_IS_CHAN_VHT80(ni->ni_chan) &&
+	    (ni->ni_chw == IEEE80211_STA_RX_BW_80));
+}
+
+/*
+ * Return true if VHT 160 rates can be transmitted to the given node.
+ *
+ * This verifies that the BSS is VHT80+80 or VHT160 capable and the current
+ * node channel width is 80+80MHz or 160MHz.
+ */
+static bool
+ieee80211_vht_check_tx_vht160(const struct ieee80211_node *ni)
+{
+	struct ieee80211vap *vap;
+	struct ieee80211_channel *bss_chan;
+
+	if (!ieee80211_vht_check_tx_vht(ni))
+		return (false);
+
+	vap = ni->ni_vap;
+	bss_chan = vap->iv_bss->ni_chan;
+
+	if (ni->ni_chw != IEEE80211_STA_RX_BW_160)
+		return (false);
+
+	if (IEEE80211_IS_CHAN_VHT160(bss_chan) &&
+	    IEEE80211_IS_CHAN_VHT160(ni->ni_chan))
+		return (true);
+
+	if (IEEE80211_IS_CHAN_VHT80P80(bss_chan) &&
+	    IEEE80211_IS_CHAN_VHT80P80(ni->ni_chan))
+		return (true);
+
+	return (false);
+}
+
+/**
+ * @brief Check if the given transmit bandwidth is available to the given node
+ *
+ * This checks that the node and BSS both allow the given bandwidth,
+ * and that the current node bandwidth (which can dynamically change)
+ * also allows said bandwidth.
+ *
+ * This relies on the channels having the flags for the narrower
+ * channels as well - eg a VHT160 channel will have the CHAN_VHT80,
+ * CHAN_VHT40, CHAN_VHT flags also set.
+ *
+ * @param ni		the ieee80211_node to check
+ * @param bw		the required bandwidth to check
+ *
+ * @returns true if it is allowed, false otherwise
+ */
+bool
+ieee80211_vht_check_tx_bw(const struct ieee80211_node *ni,
+    enum ieee80211_sta_rx_bw bw)
+{
+
+	switch (bw) {
+	case IEEE80211_STA_RX_BW_20:
+		return (ieee80211_vht_check_tx_vht(ni));
+	case IEEE80211_STA_RX_BW_40:
+		return (ieee80211_vht_check_tx_vht40(ni));
+	case IEEE80211_STA_RX_BW_80:
+		return (ieee80211_vht_check_tx_vht80(ni));
+	case IEEE80211_STA_RX_BW_160:
+		return (ieee80211_vht_check_tx_vht160(ni));
+	case IEEE80211_STA_RX_BW_320:
+		return (false);
+	default:
+		return (false);
+	}
+}
diff --git a/sys/net80211/ieee80211_vht.h b/sys/net80211/ieee80211_vht.h
index f2d1706ea0f29a..bcb61020c5a1e8 100644
--- a/sys/net80211/ieee80211_vht.h
+++ b/sys/net80211/ieee80211_vht.h
@@ -65,4 +65,8 @@ void	ieee80211_vht_get_vhtcap_ie(struct ieee80211_node *ni,
 void	ieee80211_vht_get_vhtinfo_ie(struct ieee80211_node *ni,
 	    struct ieee80211_vht_operation *, int);
 
+bool	ieee80211_vht_check_tx_vht(const struct ieee80211_node *);
+bool	ieee80211_vht_check_tx_bw(const struct ieee80211_node *,
+	    enum ieee80211_sta_rx_bw);
+
 #endif	/* _NET80211_IEEE80211_VHT_H_ */

From 82182587bcc3adf39d6b3b6347f052865c3a34e2 Mon Sep 17 00:00:00 2001
From: Adrian Chadd <adrian@FreeBSD.org>
Date: Sun, 15 Dec 2024 18:35:28 -0800
Subject: [PATCH 070/143] rtwn: add VHT20/VHT40/VHT80 bandwidth configuration
 for transmit.

Add a separate function and path for VHT 20/40/80MHz bandwidth
transmission.

Differential Revision:	https://reviews.freebsd.org/D48102
---
 sys/dev/rtwn/rtl8812a/r12a_tx.c | 41 ++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/sys/dev/rtwn/rtl8812a/r12a_tx.c b/sys/dev/rtwn/rtl8812a/r12a_tx.c
index 336ad75a0b1f45..acb23831655983 100644
--- a/sys/dev/rtwn/rtl8812a/r12a_tx.c
+++ b/sys/dev/rtwn/rtl8812a/r12a_tx.c
@@ -47,6 +47,7 @@
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_radiotap.h>
+#include <net80211/ieee80211_vht.h>
 
 #include <dev/rtwn/if_rtwnreg.h>
 #include <dev/rtwn/if_rtwnvar.h>
@@ -87,12 +88,42 @@ r12a_get_primary_channel(struct rtwn_softc *sc, struct ieee80211_channel *c)
 	return (0);
 }
 
+/*
+ * Configure VHT20/VHT40/VHT80 as appropriate.
+ *
+ * This is only called for VHT, not for HT.
+ */
+static void
+r12a_tx_set_vht_bw(struct rtwn_softc *sc, void *buf, struct ieee80211_node *ni)
+{
+	struct r12a_tx_desc *txd = (struct r12a_tx_desc *)buf;
+	int prim_chan;
+
+	prim_chan = r12a_get_primary_channel(sc, ni->ni_chan);
+
+	if (ieee80211_vht_check_tx_bw(ni, IEEE80211_STA_RX_BW_80)) {
+		txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_BW,
+		    R12A_TXDW5_DATA_BW80));
+		txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_PRIM_CHAN,
+		    prim_chan));
+	} else if (ieee80211_vht_check_tx_bw(ni, IEEE80211_STA_RX_BW_40)) {
+		txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_BW,
+		    R12A_TXDW5_DATA_BW40));
+		txd->txdw5 |= htole32(SM(R12A_TXDW5_DATA_PRIM_CHAN,
+		    prim_chan));
+	}
+}
+
+/*
+ * Configure HT20/HT40 as appropriate.
+ *
+ * This is only called for HT, not for VHT.
+ */
 static void
 r12a_tx_set_ht40(struct rtwn_softc *sc, void *buf, struct ieee80211_node *ni)
 {
 	struct r12a_tx_desc *txd = (struct r12a_tx_desc *)buf;
 
-	/* XXX VHT80; VHT40; VHT20 */
 	if (ieee80211_ht_check_tx_ht40(ni)) {
 		int prim_chan;
 
@@ -353,8 +384,12 @@ r12a_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni,
 				txd->txdw5 |= htole32(R12A_TXDW5_DATA_SHORT);
 
 			prot = IEEE80211_PROT_NONE;
-			/* TODO: VHT */
-			if (RTWN_RATE_IS_HT(ridx)) {
+			if (RTWN_RATE_IS_VHT(ridx)) {
+				r12a_tx_set_vht_bw(sc, txd, ni);
+				/* XXX TODO: sgi */
+				/* XXX TODO: ldpc */
+				prot = ic->ic_htprotmode;
+			} else if (RTWN_RATE_IS_HT(ridx)) {
 				r12a_tx_set_ht40(sc, txd, ni);
 				r12a_tx_set_sgi(sc, txd, ni);
 				r12a_tx_set_ldpc(sc, txd, ni);

From ec07af2a3d494de36a20a541efdd24874c841db5 Mon Sep 17 00:00:00 2001
From: Adrian Chadd <adrian@FreeBSD.org>
Date: Sun, 15 Dec 2024 20:15:46 -0800
Subject: [PATCH 071/143] rtwn: announce VHT support for RTL8812AU/RTL8821AU.

Although the transmit path doesn't yet support VHT rates (because
the rate control and rate representation in net80211 doesn't yet
know about VHT rates) the NIC will receive VHT frames but only
transmit HT frames.

Locally tested:

* RTL8812AU, STA mode

Differential Revision:	https://reviews.freebsd.org/D48103
---
 sys/dev/rtwn/if_rtwn.c                   | 23 +++++++++++++++++++++++
 sys/dev/rtwn/if_rtwnvar.h                |  2 ++
 sys/dev/rtwn/rtl8812a/usb/r12au_attach.c | 19 ++++++++++++++++---
 sys/dev/rtwn/rtl8821a/usb/r21au_attach.c | 17 ++++++++++++++---
 4 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/sys/dev/rtwn/if_rtwn.c b/sys/dev/rtwn/if_rtwn.c
index ed84950b1a944c..f9950c5acf4dbd 100644
--- a/sys/dev/rtwn/if_rtwn.c
+++ b/sys/dev/rtwn/if_rtwn.c
@@ -436,6 +436,29 @@ rtwn_resume(struct rtwn_softc *sc)
 	ieee80211_resume_all(ic);
 }
 
+void
+rtwn_attach_vht_cap_info_mcs(struct rtwn_softc *sc)
+{
+	struct ieee80211com *ic = &sc->sc_ic;
+	uint32_t rx_mcs = 0, tx_mcs = 0;
+
+	for (int i = 0 ; i < 8; i++) {
+		if (i < sc->ntxchains)
+			tx_mcs |= (IEEE80211_VHT_MCS_SUPPORT_0_9 << (i*2));
+		else
+			tx_mcs |= (IEEE80211_VHT_MCS_NOT_SUPPORTED << (i*2));
+
+		if (i < sc->nrxchains)
+			rx_mcs |= (IEEE80211_VHT_MCS_SUPPORT_0_9 << (i*2));
+		else
+			rx_mcs |= (IEEE80211_VHT_MCS_NOT_SUPPORTED << (i*2));
+	}
+	ic->ic_vht_cap.supp_mcs.rx_mcs_map = rx_mcs;
+	ic->ic_vht_cap.supp_mcs.rx_highest = 0;
+	ic->ic_vht_cap.supp_mcs.tx_mcs_map = tx_mcs;
+	ic->ic_vht_cap.supp_mcs.tx_highest = 0;
+}
+
 static void
 rtwn_vap_decrement_counters(struct rtwn_softc *sc,
     enum ieee80211_opmode opmode, int id)
diff --git a/sys/dev/rtwn/if_rtwnvar.h b/sys/dev/rtwn/if_rtwnvar.h
index fa4b6d0a5df7af..aa42715b1674e8 100644
--- a/sys/dev/rtwn/if_rtwnvar.h
+++ b/sys/dev/rtwn/if_rtwnvar.h
@@ -436,6 +436,8 @@ void	rtwn_detach(struct rtwn_softc *);
 void	rtwn_resume(struct rtwn_softc *);
 void	rtwn_suspend(struct rtwn_softc *);
 
+void	rtwn_attach_vht_cap_info_mcs(struct rtwn_softc *);
+
 /* Interface-specific. */
 #define rtwn_write_1(_sc, _addr, _val) \
 	(((_sc)->sc_write_1)((_sc), (_addr), (_val)))
diff --git a/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c b/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c
index c87bffb4db197d..b6850eb9fa23eb 100644
--- a/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c
+++ b/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c
@@ -175,11 +175,24 @@ r12au_adj_devcaps(struct rtwn_softc *sc)
 	}
 
 	ic->ic_htcaps |=
-	    IEEE80211_HTCAP_CHWIDTH40 /* 40 MHz channel width */
-	    | IEEE80211_HTCAP_SHORTGI40 /* short GI in 40MHz */
+	    IEEE80211_HTCAP_CHWIDTH40 | /* 40 MHz channel width */
+	    IEEE80211_HTCAP_SHORTGI40 /* short GI in 40MHz */
 	;
 
-	/* TODO: STBC, VHT etc */
+	/* TODO: STBC */
+
+	/* VHT config */
+	ic->ic_flags_ext |= IEEE80211_FEXT_VHT;
+	ic->ic_vht_cap.vht_cap_info =
+	    IEEE80211_VHTCAP_MAX_MPDU_LENGTH_11454 |
+	    IEEE80211_VHTCAP_SHORT_GI_80 |
+	    IEEE80211_VHTCAP_TXSTBC |
+	    IEEE80211_VHTCAP_RXSTBC_1 |
+	    IEEE80211_VHTCAP_HTC_VHT |
+	    _IEEE80211_SHIFTMASK(7,
+	      IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK);
+
+	rtwn_attach_vht_cap_info_mcs(sc);
 }
 
 void
diff --git a/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c b/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
index 175bac8f6fc9a9..60cb6d3fc61d40 100644
--- a/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
+++ b/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
@@ -160,11 +160,22 @@ r21au_adj_devcaps(struct rtwn_softc *sc)
 		ic->ic_caps |= IEEE80211_C_DFS;
 
 	ic->ic_htcaps |=
-	    IEEE80211_HTCAP_CHWIDTH40 /* 40 MHz channel width */
-	    | IEEE80211_HTCAP_SHORTGI40 /* short GI in 40MHz */
+	    IEEE80211_HTCAP_CHWIDTH40 | /* 40 MHz channel width */
+	    IEEE80211_HTCAP_SHORTGI40 /* short GI in 40MHz */
 	    ;
 
-	/* TODO: VHT */
+	/* VHT config */
+	ic->ic_flags_ext |= IEEE80211_FEXT_VHT;
+	ic->ic_vht_cap.vht_cap_info =
+	    IEEE80211_VHTCAP_MAX_MPDU_LENGTH_11454 |
+	    IEEE80211_VHTCAP_SHORT_GI_80 |
+	    IEEE80211_VHTCAP_TXSTBC |
+	    IEEE80211_VHTCAP_RXSTBC_1 |
+	    IEEE80211_VHTCAP_HTC_VHT |
+	    _IEEE80211_SHIFTMASK(7,
+	        IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK);
+
+	rtwn_attach_vht_cap_info_mcs(sc);
 }
 
 void

From 5aac61c5d2bcbd3358b3d74d46827a8bfdeff86c Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Wed, 8 Jan 2025 20:00:12 -0800
Subject: [PATCH 072/143] rpc: delete disabled code from rpcb_clnt.c

The code was disabled since the initial bulk check-in from Isilon in 2008.
Its existence poisoned grep(1) results when one would try to learn what
are the actual RPC mechanisms that are used by the modern NFC client and
server.
---
 sys/rpc/rpcb_clnt.c | 1142 +------------------------------------------
 1 file changed, 1 insertion(+), 1141 deletions(-)

diff --git a/sys/rpc/rpcb_clnt.c b/sys/rpc/rpcb_clnt.c
index 5f7d13020af07e..ee2253596f8589 100644
--- a/sys/rpc/rpcb_clnt.c
+++ b/sys/rpc/rpcb_clnt.c
@@ -62,363 +62,6 @@ static const char nullstring[] = "\000";
 
 static CLIENT *local_rpcb(void);
 
-#if 0
-
-static const struct timeval rmttimeout = { 3, 0 };
-static struct timeval rpcbrmttime = { 15, 0 };
-
-#define	CACHESIZE 6
-
-struct address_cache {
-	char *ac_host;
-	char *ac_netid;
-	char *ac_uaddr;
-	struct netbuf *ac_taddr;
-	struct address_cache *ac_next;
-};
-
-static struct address_cache *front;
-static int cachesize;
-
-#define	CLCR_GET_RPCB_TIMEOUT	1
-#define	CLCR_SET_RPCB_TIMEOUT	2
-
-
-extern int __rpc_lowvers;
-
-static struct address_cache *check_cache(const char *, const char *);
-static void delete_cache(struct netbuf *);
-static void add_cache(const char *, const char *, struct netbuf *, char *);
-static CLIENT *getclnthandle(const char *, const struct netconfig *, char **);
-static CLIENT *local_rpcb(void);
-static struct netbuf *got_entry(rpcb_entry_list_ptr, const struct netconfig *);
-
-/*
- * This routine adjusts the timeout used for calls to the remote rpcbind.
- * Also, this routine can be used to set the use of portmapper version 2
- * only when doing rpc_broadcasts
- * These are private routines that may not be provided in future releases.
- */
-bool_t
-__rpc_control(request, info)
-	int	request;
-	void	*info;
-{
-	switch (request) {
-	case CLCR_GET_RPCB_TIMEOUT:
-		*(struct timeval *)info = tottimeout;
-		break;
-	case CLCR_SET_RPCB_TIMEOUT:
-		tottimeout = *(struct timeval *)info;
-		break;
-	case CLCR_SET_LOWVERS:
-		__rpc_lowvers = *(int *)info;
-		break;
-	case CLCR_GET_LOWVERS:
-		*(int *)info = __rpc_lowvers;
-		break;
-	default:
-		return (FALSE);
-	}
-	return (TRUE);
-}
-
-/*
- *	It might seem that a reader/writer lock would be more reasonable here.
- *	However because getclnthandle(), the only user of the cache functions,
- *	may do a delete_cache() operation if a check_cache() fails to return an
- *	address useful to clnt_tli_create(), we may as well use a mutex.
- */
-/*
- * As it turns out, if the cache lock is *not* a reader/writer lock, we will
- * block all clnt_create's if we are trying to connect to a host that's down,
- * since the lock will be held all during that time.
- */
-
-/*
- * The routines check_cache(), add_cache(), delete_cache() manage the
- * cache of rpcbind addresses for (host, netid).
- */
-
-static struct address_cache *
-check_cache(host, netid)
-	const char *host, *netid;
-{
-	struct address_cache *cptr;
-
-	/* READ LOCK HELD ON ENTRY: rpcbaddr_cache_lock */
-
-	for (cptr = front; cptr != NULL; cptr = cptr->ac_next) {
-		if (!strcmp(cptr->ac_host, host) &&
-		    !strcmp(cptr->ac_netid, netid)) {
-#ifdef ND_DEBUG
-			fprintf(stderr, "Found cache entry for %s: %s\n",
-				host, netid);
-#endif
-			return (cptr);
-		}
-	}
-	return ((struct address_cache *) NULL);
-}
-
-static void
-delete_cache(addr)
-	struct netbuf *addr;
-{
-	struct address_cache *cptr, *prevptr = NULL;
-
-	/* WRITE LOCK HELD ON ENTRY: rpcbaddr_cache_lock */
-	for (cptr = front; cptr != NULL; cptr = cptr->ac_next) {
-		if (!memcmp(cptr->ac_taddr->buf, addr->buf, addr->len)) {
-			free(cptr->ac_host);
-			free(cptr->ac_netid);
-			free(cptr->ac_taddr->buf);
-			free(cptr->ac_taddr);
-			if (cptr->ac_uaddr)
-				free(cptr->ac_uaddr);
-			if (prevptr)
-				prevptr->ac_next = cptr->ac_next;
-			else
-				front = cptr->ac_next;
-			free(cptr);
-			cachesize--;
-			break;
-		}
-		prevptr = cptr;
-	}
-}
-
-static void
-add_cache(host, netid, taddr, uaddr)
-	const char *host, *netid;
-	char *uaddr;
-	struct netbuf *taddr;
-{
-	struct address_cache  *ad_cache, *cptr, *prevptr;
-
-	ad_cache = (struct address_cache *)
-			malloc(sizeof (struct address_cache));
-	if (!ad_cache) {
-		return;
-	}
-	ad_cache->ac_host = strdup(host);
-	ad_cache->ac_netid = strdup(netid);
-	ad_cache->ac_uaddr = uaddr ? strdup(uaddr) : NULL;
-	ad_cache->ac_taddr = (struct netbuf *)malloc(sizeof (struct netbuf));
-	if (!ad_cache->ac_host || !ad_cache->ac_netid || !ad_cache->ac_taddr ||
-		(uaddr && !ad_cache->ac_uaddr)) {
-		goto out;
-	}
-	ad_cache->ac_taddr->len = ad_cache->ac_taddr->maxlen = taddr->len;
-	ad_cache->ac_taddr->buf = (char *) malloc(taddr->len);
-	if (ad_cache->ac_taddr->buf == NULL) {
-out:
-		if (ad_cache->ac_host)
-			free(ad_cache->ac_host);
-		if (ad_cache->ac_netid)
-			free(ad_cache->ac_netid);
-		if (ad_cache->ac_uaddr)
-			free(ad_cache->ac_uaddr);
-		if (ad_cache->ac_taddr)
-			free(ad_cache->ac_taddr);
-		free(ad_cache);
-		return;
-	}
-	memcpy(ad_cache->ac_taddr->buf, taddr->buf, taddr->len);
-#ifdef ND_DEBUG
-	fprintf(stderr, "Added to cache: %s : %s\n", host, netid);
-#endif
-
-/* VARIABLES PROTECTED BY rpcbaddr_cache_lock:  cptr */
-
-	rwlock_wrlock(&rpcbaddr_cache_lock);
-	if (cachesize < CACHESIZE) {
-		ad_cache->ac_next = front;
-		front = ad_cache;
-		cachesize++;
-	} else {
-		/* Free the last entry */
-		cptr = front;
-		prevptr = NULL;
-		while (cptr->ac_next) {
-			prevptr = cptr;
-			cptr = cptr->ac_next;
-		}
-
-#ifdef ND_DEBUG
-		fprintf(stderr, "Deleted from cache: %s : %s\n",
-			cptr->ac_host, cptr->ac_netid);
-#endif
-		free(cptr->ac_host);
-		free(cptr->ac_netid);
-		free(cptr->ac_taddr->buf);
-		free(cptr->ac_taddr);
-		if (cptr->ac_uaddr)
-			free(cptr->ac_uaddr);
-
-		if (prevptr) {
-			prevptr->ac_next = NULL;
-			ad_cache->ac_next = front;
-			front = ad_cache;
-		} else {
-			front = ad_cache;
-			ad_cache->ac_next = NULL;
-		}
-		free(cptr);
-	}
-	rwlock_unlock(&rpcbaddr_cache_lock);
-}
-
-/*
- * This routine will return a client handle that is connected to the
- * rpcbind. If targaddr is non-NULL, the "universal address" of the
- * host will be stored in *targaddr; the caller is responsible for
- * freeing this string.
- * On error, returns NULL and free's everything.
- */
-static CLIENT *
-getclnthandle(host, nconf, targaddr)
-	const char *host;
-	const struct netconfig *nconf;
-	char **targaddr;
-{
-	CLIENT *client;
-	struct netbuf *addr, taddr;
-	struct netbuf addr_to_delete;
-	struct __rpc_sockinfo si;
-	struct addrinfo hints, *res, *tres;
-	struct address_cache *ad_cache;
-	char *tmpaddr;
-
-/* VARIABLES PROTECTED BY rpcbaddr_cache_lock:  ad_cache */
-
-	/* Get the address of the rpcbind.  Check cache first */
-	client = NULL;
-	addr_to_delete.len = 0;
-	rwlock_rdlock(&rpcbaddr_cache_lock);
-	ad_cache = NULL;
-	if (host != NULL)
-		ad_cache = check_cache(host, nconf->nc_netid);
-	if (ad_cache != NULL) {
-		addr = ad_cache->ac_taddr;
-		client = clnt_tli_create(RPC_ANYFD, nconf, addr,
-		    (rpcprog_t)RPCBPROG, (rpcvers_t)RPCBVERS4, 0, 0);
-		if (client != NULL) {
-			if (targaddr)
-				*targaddr = strdup(ad_cache->ac_uaddr);
-			rwlock_unlock(&rpcbaddr_cache_lock);
-			return (client);
-		}
-		addr_to_delete.len = addr->len;
-		addr_to_delete.buf = (char *)malloc(addr->len);
-		if (addr_to_delete.buf == NULL) {
-			addr_to_delete.len = 0;
-		} else {
-			memcpy(addr_to_delete.buf, addr->buf, addr->len);
-		}
-	}
-	rwlock_unlock(&rpcbaddr_cache_lock);
-	if (addr_to_delete.len != 0) {
-		/*
-		 * Assume this may be due to cache data being
-		 *  outdated
-		 */
-		rwlock_wrlock(&rpcbaddr_cache_lock);
-		delete_cache(&addr_to_delete);
-		rwlock_unlock(&rpcbaddr_cache_lock);
-		free(addr_to_delete.buf);
-	}
-	if (!__rpc_nconf2sockinfo(nconf, &si)) {
-		rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-		return NULL;
-	}
-
-	memset(&hints, 0, sizeof hints);
-	hints.ai_family = si.si_af;
-	hints.ai_socktype = si.si_socktype;
-	hints.ai_protocol = si.si_proto;
-
-#ifdef CLNT_DEBUG
-	printf("trying netid %s family %d proto %d socktype %d\n",
-	    nconf->nc_netid, si.si_af, si.si_proto, si.si_socktype);
-#endif
-
-	if (nconf->nc_protofmly != NULL && strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0) {
-		client = local_rpcb();
-		if (! client) {
-#ifdef ND_DEBUG
-			clnt_pcreateerror("rpcbind clnt interface");
-#endif
-			return (NULL);
-		} else {
-			struct sockaddr_un sun;
-			if (targaddr) {
-			    *targaddr = malloc(sizeof(sun.sun_path));
-			    if (*targaddr == NULL) {
-				CLNT_DESTROY(client);
-				return (NULL);
-			    }
-			    strncpy(*targaddr, _PATH_RPCBINDSOCK,
-				sizeof(sun.sun_path));
-			}
-			return (client);
-		}
-	} else {
-		if (getaddrinfo(host, "sunrpc", &hints, &res) != 0) {
-			rpc_createerr.cf_stat = RPC_UNKNOWNHOST;
-			return NULL;
-		}
-	}
-
-	for (tres = res; tres != NULL; tres = tres->ai_next) {
-		taddr.buf = tres->ai_addr;
-		taddr.len = taddr.maxlen = tres->ai_addrlen;
-
-#ifdef ND_DEBUG
-		{
-			char *ua;
-
-			ua = taddr2uaddr(nconf, &taddr);
-			fprintf(stderr, "Got it [%s]\n", ua);
-			free(ua);
-		}
-#endif
-
-#ifdef ND_DEBUG
-		{
-			int i;
-
-			fprintf(stderr, "\tnetbuf len = %d, maxlen = %d\n",
-				taddr.len, taddr.maxlen);
-			fprintf(stderr, "\tAddress is ");
-			for (i = 0; i < taddr.len; i++)
-				fprintf(stderr, "%u.", ((char *)(taddr.buf))[i]);
-			fprintf(stderr, "\n");
-		}
-#endif
-		client = clnt_tli_create(RPC_ANYFD, nconf, &taddr,
-		    (rpcprog_t)RPCBPROG, (rpcvers_t)RPCBVERS4, 0, 0);
-#ifdef ND_DEBUG
-		if (! client) {
-			clnt_pcreateerror("rpcbind clnt interface");
-		}
-#endif
-
-		if (client) {
-			tmpaddr = targaddr ? taddr2uaddr(nconf, &taddr) : NULL;
-			add_cache(host, nconf->nc_netid, &taddr, tmpaddr);
-			if (targaddr)
-				*targaddr = tmpaddr;
-			break;
-		}
-	}
-	if (res)
-		freeaddrinfo(res);
-	return (client);
-}
-
-#endif
-
 /* XXX */
 #define IN4_LOCALHOST_STRING	"127.0.0.1"
 #define IN6_LOCALHOST_STRING	"::1"
@@ -446,7 +89,7 @@ local_rpcb(void)
 	error = socreate(AF_LOCAL, &so, SOCK_STREAM, 0, curthread->td_ucred,
 	    curthread);
 	if (error)
-		goto try_nconf;
+		return (NULL);
 	sun.sun_family = AF_LOCAL;
 	strcpy(sun.sun_path, _PATH_RPCBINDSOCK);
 	sun.sun_len = SUN_LEN(&sun);
@@ -464,65 +107,7 @@ local_rpcb(void)
 	/* Nobody needs this socket anymore; free the descriptor. */
 	soclose(so);
 
-try_nconf:
-
-#if 0
-	static struct netconfig *loopnconf;
-	static char *localhostname;
-
-/* VARIABLES PROTECTED BY loopnconf_lock: loopnconf */
-	mutex_lock(&loopnconf_lock);
-	if (loopnconf == NULL) {
-		struct netconfig *nconf, *tmpnconf = NULL;
-		void *nc_handle;
-		int fd;
-
-		nc_handle = setnetconfig();
-		if (nc_handle == NULL) {
-			/* fails to open netconfig file */
-			syslog (LOG_ERR, "rpc: failed to open " NETCONFIG);
-			rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-			mutex_unlock(&loopnconf_lock);
-			return (NULL);
-		}
-		while ((nconf = getnetconfig(nc_handle)) != NULL) {
-			if ((
-#ifdef INET6
-			     strcmp(nconf->nc_protofmly, NC_INET6) == 0 ||
-#endif
-			     strcmp(nconf->nc_protofmly, NC_INET) == 0) &&
-			    (nconf->nc_semantics == NC_TPI_COTS ||
-			     nconf->nc_semantics == NC_TPI_COTS_ORD)) {
-				fd = __rpc_nconf2fd(nconf);
-				/*
-				 * Can't create a socket, assume that
-				 * this family isn't configured in the kernel.
-				 */
-				if (fd < 0)
-					continue;
-				_close(fd);
-				tmpnconf = nconf;
-				if (!strcmp(nconf->nc_protofmly, NC_INET))
-					localhostname = IN4_LOCALHOST_STRING;
-				else
-					localhostname = IN6_LOCALHOST_STRING;
-			}
-		}
-		if (tmpnconf == NULL) {
-			rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-			mutex_unlock(&loopnconf_lock);
-			return (NULL);
-		}
-		loopnconf = getnetconfigent(tmpnconf->nc_netid);
-		/* loopnconf is never freed */
-		endnetconfig(nc_handle);
-	}
-	mutex_unlock(&loopnconf_lock);
-	client = getclnthandle(localhostname, loopnconf, NULL);
-	return (client);
-#else
 	return (NULL);
-#endif
 }
 
 /*
@@ -636,728 +221,3 @@ rpcb_unset(rpcprog_t program, rpcvers_t version, const struct netconfig *nconf)
 	CLNT_DESTROY(client);
 	return (rslt);
 }
-
-#if 0
-
-/*
- * From the merged list, find the appropriate entry
- */
-static struct netbuf *
-got_entry(relp, nconf)
-	rpcb_entry_list_ptr relp;
-	const struct netconfig *nconf;
-{
-	struct netbuf *na = NULL;
-	rpcb_entry_list_ptr sp;
-	rpcb_entry *rmap;
-
-	for (sp = relp; sp != NULL; sp = sp->rpcb_entry_next) {
-		rmap = &sp->rpcb_entry_map;
-		if ((strcmp(nconf->nc_proto, rmap->r_nc_proto) == 0) &&
-		    (strcmp(nconf->nc_protofmly, rmap->r_nc_protofmly) == 0) &&
-		    (nconf->nc_semantics == rmap->r_nc_semantics) &&
-		    (rmap->r_maddr != NULL) && (rmap->r_maddr[0] != 0)) {
-			na = uaddr2taddr(nconf, rmap->r_maddr);
-#ifdef ND_DEBUG
-			fprintf(stderr, "\tRemote address is [%s].\n",
-				rmap->r_maddr);
-			if (!na)
-				fprintf(stderr,
-				    "\tCouldn't resolve remote address!\n");
-#endif
-			break;
-		}
-	}
-	return (na);
-}
-
-/*
- * Quick check to see if rpcbind is up.  Tries to connect over
- * local transport.
- */
-static bool_t
-__rpcbind_is_up()
-{
-	struct netconfig *nconf;
-	struct sockaddr_un sun;
-	void *localhandle;
-	int sock;
-
-	nconf = NULL;
-	localhandle = setnetconfig();
-	while ((nconf = getnetconfig(localhandle)) != NULL) {
-		if (nconf->nc_protofmly != NULL &&
-		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0)
-			 break;
-	}
-	if (nconf == NULL)
-		return (FALSE);
-
-	endnetconfig(localhandle);
-
-	memset(&sun, 0, sizeof sun);
-	sock = _socket(AF_LOCAL, SOCK_STREAM, 0);
-	if (sock < 0)
-		return (FALSE);
-	sun.sun_family = AF_LOCAL;
-	strncpy(sun.sun_path, _PATH_RPCBINDSOCK, sizeof(sun.sun_path));
-	sun.sun_len = SUN_LEN(&sun);
-
-	if (_connect(sock, (struct sockaddr *)&sun, sun.sun_len) < 0) {
-		_close(sock);
-		return (FALSE);
-	}
-
-	_close(sock);
-	return (TRUE);
-}
-
-/*
- * An internal function which optimizes rpcb_getaddr function.  It also
- * returns the client handle that it uses to contact the remote rpcbind.
- *
- * The algorithm used: If the transports is TCP or UDP, it first tries
- * version 2 (portmap), 4 and then 3 (svr4).  This order should be
- * changed in the next OS release to 4, 2 and 3.  We are assuming that by
- * that time, version 4 would be available on many machines on the network.
- * With this algorithm, we get performance as well as a plan for
- * obsoleting version 2.
- *
- * For all other transports, the algorithm remains as 4 and then 3.
- *
- * XXX: Due to some problems with t_connect(), we do not reuse the same client
- * handle for COTS cases and hence in these cases we do not return the
- * client handle.  This code will change if t_connect() ever
- * starts working properly.  Also look under clnt_vc.c.
- */
-struct netbuf *
-__rpcb_findaddr_timed(program, version, nconf, host, clpp, tp)
-	rpcprog_t program;
-	rpcvers_t version;
-	const struct netconfig *nconf;
-	const char *host;
-	CLIENT **clpp;
-	struct timeval *tp;
-{
-	static bool_t check_rpcbind = TRUE;
-	CLIENT *client = NULL;
-	RPCB parms;
-	enum clnt_stat clnt_st;
-	char *ua = NULL;
-	rpcvers_t vers;
-	struct netbuf *address = NULL;
-	rpcvers_t start_vers = RPCBVERS4;
-	struct netbuf servaddr;
-
-	/* parameter checking */
-	if (nconf == NULL) {
-		rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-		return (NULL);
-	}
-
-	parms.r_addr = NULL;
-
-	/*
-	 * Use default total timeout if no timeout is specified.
-	 */
-	if (tp == NULL)
-		tp = &tottimeout;
-
-#ifdef PORTMAP
-	/* Try version 2 for TCP or UDP */
-	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
-		u_short port = 0;
-		struct netbuf remote;
-		rpcvers_t pmapvers = 2;
-		struct pmap pmapparms;
-
-		/*
-		 * Try UDP only - there are some portmappers out
-		 * there that use UDP only.
-		 */
-		if (strcmp(nconf->nc_proto, NC_TCP) == 0) {
-			struct netconfig *newnconf;
-
-			if ((newnconf = getnetconfigent("udp")) == NULL) {
-				rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-				return (NULL);
-			}
-			client = getclnthandle(host, newnconf, &parms.r_addr);
-			freenetconfigent(newnconf);
-		} else {
-			client = getclnthandle(host, nconf, &parms.r_addr);
-		}
-		if (client == NULL)
-			return (NULL);
-
-		/*
-		 * Set version and retry timeout.
-		 */
-		CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *)&rpcbrmttime);
-		CLNT_CONTROL(client, CLSET_VERS, (char *)&pmapvers);
-
-		pmapparms.pm_prog = program;
-		pmapparms.pm_vers = version;
-		pmapparms.pm_prot = strcmp(nconf->nc_proto, NC_TCP) ?
-					IPPROTO_UDP : IPPROTO_TCP;
-		pmapparms.pm_port = 0;	/* not needed */
-		clnt_st = CLNT_CALL(client, (rpcproc_t)PMAPPROC_GETPORT,
-		    (xdrproc_t) xdr_pmap, (caddr_t)(void *)&pmapparms,
-		    (xdrproc_t) xdr_u_short, (caddr_t)(void *)&port,
-		    *tp);
-		if (clnt_st != RPC_SUCCESS) {
-			if ((clnt_st == RPC_PROGVERSMISMATCH) ||
-				(clnt_st == RPC_PROGUNAVAIL))
-				goto try_rpcbind; /* Try different versions */
-			rpc_createerr.cf_stat = RPC_PMAPFAILURE;
-			clnt_geterr(client, &rpc_createerr.cf_error);
-			goto error;
-		} else if (port == 0) {
-			address = NULL;
-			rpc_createerr.cf_stat = RPC_PROGNOTREGISTERED;
-			goto error;
-		}
-		port = htons(port);
-		CLNT_CONTROL(client, CLGET_SVC_ADDR, (char *)&remote);
-		if (((address = (struct netbuf *)
-			malloc(sizeof (struct netbuf))) == NULL) ||
-		    ((address->buf = (char *)
-			malloc(remote.len)) == NULL)) {
-			rpc_createerr.cf_stat = RPC_SYSTEMERROR;
-			clnt_geterr(client, &rpc_createerr.cf_error);
-			if (address) {
-				free(address);
-				address = NULL;
-			}
-			goto error;
-		}
-		memcpy(address->buf, remote.buf, remote.len);
-		memcpy(&((char *)address->buf)[sizeof (short)],
-				(char *)(void *)&port, sizeof (short));
-		address->len = address->maxlen = remote.len;
-		goto done;
-	}
-#endif				/* PORTMAP */
-
-try_rpcbind:
-	/*
-	 * Check if rpcbind is up.  This prevents needless delays when
-	 * accessing applications such as the keyserver while booting
-	 * disklessly.
-	 */
-	if (check_rpcbind && strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0) {
-		if (!__rpcbind_is_up()) {
-			rpc_createerr.cf_stat = RPC_PMAPFAILURE;
-			rpc_createerr.cf_error.re_errno = 0;
-			goto error;
-		}
-		check_rpcbind = FALSE;
-	}
-
-	/*
-	 * Now we try version 4 and then 3.
-	 * We also send the remote system the address we used to
-	 * contact it in case it can help to connect back with us
-	 */
-	parms.r_prog = program;
-	parms.r_vers = version;
-	/*LINTED const castaway*/
-	parms.r_owner = (char *) &nullstring[0];	/* not needed; */
-							/* just for xdring */
-	parms.r_netid = nconf->nc_netid; /* not really needed */
-
-	/*
-	 * If a COTS transport is being used, try getting address via CLTS
-	 * transport.  This works only with version 4.
-	 */
-	if (nconf->nc_semantics == NC_TPI_COTS_ORD ||
-			nconf->nc_semantics == NC_TPI_COTS) {
-
-		void *handle;
-		struct netconfig *nconf_clts;
-		rpcb_entry_list_ptr relp = NULL;
-
-		if (client == NULL) {
-			/* This did not go through the above PORTMAP/TCP code */
-			if ((handle = __rpc_setconf("datagram_v")) != NULL) {
-				while ((nconf_clts = __rpc_getconf(handle))
-					!= NULL) {
-					if (strcmp(nconf_clts->nc_protofmly,
-						nconf->nc_protofmly) != 0) {
-						continue;
-					}
-					client = getclnthandle(host, nconf_clts,
-							&parms.r_addr);
-					break;
-				}
-				__rpc_endconf(handle);
-			}
-			if (client == NULL)
-				goto regular_rpcbind;	/* Go the regular way */
-		} else {
-			/* This is a UDP PORTMAP handle.  Change to version 4 */
-			vers = RPCBVERS4;
-			CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&vers);
-		}
-		/*
-		 * We also send the remote system the address we used to
-		 * contact it in case it can help it connect back with us
-		 */
-		if (parms.r_addr == NULL) {
-			/*LINTED const castaway*/
-			parms.r_addr = (char *) &nullstring[0]; /* for XDRing */
-		}
-
-		CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *)&rpcbrmttime);
-
-		clnt_st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_GETADDRLIST,
-		    (xdrproc_t) xdr_rpcb, (char *)(void *)&parms,
-		    (xdrproc_t) xdr_rpcb_entry_list_ptr,
-		    (char *)(void *)&relp, *tp);
-		if (clnt_st == RPC_SUCCESS) {
-			if ((address = got_entry(relp, nconf)) != NULL) {
-				xdr_free((xdrproc_t) xdr_rpcb_entry_list_ptr,
-				    (char *)(void *)&relp);
-				CLNT_CONTROL(client, CLGET_SVC_ADDR,
-					(char *)(void *)&servaddr);
-				__rpc_fixup_addr(address, &servaddr);
-				goto done;
-			}
-			/* Entry not found for this transport */
-			xdr_free((xdrproc_t) xdr_rpcb_entry_list_ptr,
-			    (char *)(void *)&relp);
-			/*
-			 * XXX: should have perhaps returned with error but
-			 * since the remote machine might not always be able
-			 * to send the address on all transports, we try the
-			 * regular way with regular_rpcbind
-			 */
-			goto regular_rpcbind;
-		} else if ((clnt_st == RPC_PROGVERSMISMATCH) ||
-			(clnt_st == RPC_PROGUNAVAIL)) {
-			start_vers = RPCBVERS;	/* Try version 3 now */
-			goto regular_rpcbind; /* Try different versions */
-		} else {
-			rpc_createerr.cf_stat = RPC_PMAPFAILURE;
-			clnt_geterr(client, &rpc_createerr.cf_error);
-			goto error;
-		}
-	}
-
-regular_rpcbind:
-
-	/* Now the same transport is to be used to get the address */
-	if (client && ((nconf->nc_semantics == NC_TPI_COTS_ORD) ||
-			(nconf->nc_semantics == NC_TPI_COTS))) {
-		/* A CLTS type of client - destroy it */
-		CLNT_DESTROY(client);
-		client = NULL;
-	}
-
-	if (client == NULL) {
-		client = getclnthandle(host, nconf, &parms.r_addr);
-		if (client == NULL) {
-			goto error;
-		}
-	}
-	if (parms.r_addr == NULL) {
-		/*LINTED const castaway*/
-		parms.r_addr = (char *) &nullstring[0];
-	}
-
-	/* First try from start_vers and then version 3 (RPCBVERS) */
-
-	CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *) &rpcbrmttime);
-	for (vers = start_vers;  vers >= RPCBVERS; vers--) {
-		/* Set the version */
-		CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&vers);
-		clnt_st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_GETADDR,
-		    (xdrproc_t) xdr_rpcb, (char *)(void *)&parms,
-		    (xdrproc_t) xdr_wrapstring, (char *)(void *) &ua, *tp);
-		if (clnt_st == RPC_SUCCESS) {
-			if ((ua == NULL) || (ua[0] == 0)) {
-				/* address unknown */
-				rpc_createerr.cf_stat = RPC_PROGNOTREGISTERED;
-				goto error;
-			}
-			address = uaddr2taddr(nconf, ua);
-#ifdef ND_DEBUG
-			fprintf(stderr, "\tRemote address is [%s]\n", ua);
-			if (!address)
-				fprintf(stderr,
-					"\tCouldn't resolve remote address!\n");
-#endif
-			xdr_free((xdrproc_t)xdr_wrapstring,
-			    (char *)(void *)&ua);
-
-			if (! address) {
-				/* We don't know about your universal address */
-				rpc_createerr.cf_stat = RPC_N2AXLATEFAILURE;
-				goto error;
-			}
-			CLNT_CONTROL(client, CLGET_SVC_ADDR,
-			    (char *)(void *)&servaddr);
-			__rpc_fixup_addr(address, &servaddr);
-			goto done;
-		} else if (clnt_st == RPC_PROGVERSMISMATCH) {
-			struct rpc_err rpcerr;
-
-			clnt_geterr(client, &rpcerr);
-			if (rpcerr.re_vers.low > RPCBVERS4)
-				goto error;  /* a new version, can't handle */
-		} else if (clnt_st != RPC_PROGUNAVAIL) {
-			/* Cant handle this error */
-			rpc_createerr.cf_stat = clnt_st;
-			clnt_geterr(client, &rpc_createerr.cf_error);
-			goto error;
-		}
-	}
-
-error:
-	if (client) {
-		CLNT_DESTROY(client);
-		client = NULL;
-	}
-done:
-	if (nconf->nc_semantics != NC_TPI_CLTS) {
-		/* This client is the connectionless one */
-		if (client) {
-			CLNT_DESTROY(client);
-			client = NULL;
-		}
-	}
-	if (clpp) {
-		*clpp = client;
-	} else if (client) {
-		CLNT_DESTROY(client);
-	}
-	if (parms.r_addr != NULL && parms.r_addr != nullstring)
-		free(parms.r_addr);
-	return (address);
-}
-
-
-/*
- * Find the mapped address for program, version.
- * Calls the rpcbind service remotely to do the lookup.
- * Uses the transport specified in nconf.
- * Returns FALSE (0) if no map exists, else returns 1.
- *
- * Assuming that the address is all properly allocated
- */
-bool_t
-rpcb_getaddr(program, version, nconf, address, host)
-	rpcprog_t program;
-	rpcvers_t version;
-	const struct netconfig *nconf;
-	struct netbuf *address;
-	const char *host;
-{
-	struct netbuf *na;
-
-	if ((na = __rpcb_findaddr_timed(program, version,
-	    (struct netconfig *) nconf, (char *) host,
-	    (CLIENT **) NULL, (struct timeval *) NULL)) == NULL)
-		return (FALSE);
-
-	if (na->len > address->maxlen) {
-		/* Too long address */
-		free(na->buf);
-		free(na);
-		rpc_createerr.cf_stat = RPC_FAILED;
-		return (FALSE);
-	}
-	memcpy(address->buf, na->buf, (size_t)na->len);
-	address->len = na->len;
-	free(na->buf);
-	free(na);
-	return (TRUE);
-}
-
-/*
- * Get a copy of the current maps.
- * Calls the rpcbind service remotely to get the maps.
- *
- * It returns only a list of the services
- * It returns NULL on failure.
- */
-rpcblist *
-rpcb_getmaps(nconf, host)
-	const struct netconfig *nconf;
-	const char *host;
-{
-	rpcblist_ptr head = NULL;
-	CLIENT *client;
-	enum clnt_stat clnt_st;
-	rpcvers_t vers = 0;
-
-	client = getclnthandle(host, nconf, NULL);
-	if (client == NULL) {
-		return (head);
-	}
-	clnt_st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_DUMP,
-	    (xdrproc_t) xdr_void, NULL, (xdrproc_t) xdr_rpcblist_ptr,
-	    (char *)(void *)&head, tottimeout);
-	if (clnt_st == RPC_SUCCESS)
-		goto done;
-
-	if ((clnt_st != RPC_PROGVERSMISMATCH) &&
-	    (clnt_st != RPC_PROGUNAVAIL)) {
-		rpc_createerr.cf_stat = RPC_RPCBFAILURE;
-		clnt_geterr(client, &rpc_createerr.cf_error);
-		goto done;
-	}
-
-	/* fall back to earlier version */
-	CLNT_CONTROL(client, CLGET_VERS, (char *)(void *)&vers);
-	if (vers == RPCBVERS4) {
-		vers = RPCBVERS;
-		CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&vers);
-		if (CLNT_CALL(client, (rpcproc_t)RPCBPROC_DUMP,
-		    (xdrproc_t) xdr_void, NULL, (xdrproc_t) xdr_rpcblist_ptr,
-		    (char *)(void *)&head, tottimeout) == RPC_SUCCESS)
-			goto done;
-	}
-	rpc_createerr.cf_stat = RPC_RPCBFAILURE;
-	clnt_geterr(client, &rpc_createerr.cf_error);
-
-done:
-	CLNT_DESTROY(client);
-	return (head);
-}
-
-/*
- * rpcbinder remote-call-service interface.
- * This routine is used to call the rpcbind remote call service
- * which will look up a service program in the address maps, and then
- * remotely call that routine with the given parameters. This allows
- * programs to do a lookup and call in one step.
-*/
-enum clnt_stat
-rpcb_rmtcall(nconf, host, prog, vers, proc, xdrargs, argsp,
-		xdrres, resp, tout, addr_ptr)
-	const struct netconfig *nconf;	/* Netconfig structure */
-	const char *host;			/* Remote host name */
-	rpcprog_t prog;
-	rpcvers_t vers;
-	rpcproc_t proc;			/* Remote proc identifiers */
-	xdrproc_t xdrargs, xdrres;	/* XDR routines */
-	caddr_t argsp, resp;		/* Argument and Result */
-	struct timeval tout;		/* Timeout value for this call */
-	const struct netbuf *addr_ptr;	/* Preallocated netbuf address */
-{
-	CLIENT *client;
-	enum clnt_stat stat;
-	struct r_rpcb_rmtcallargs a;
-	struct r_rpcb_rmtcallres r;
-	rpcvers_t rpcb_vers;
-
-	stat = 0;
-	client = getclnthandle(host, nconf, NULL);
-	if (client == NULL) {
-		return (RPC_FAILED);
-	}
-	/*LINTED const castaway*/
-	CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *)(void *)&rmttimeout);
-	a.prog = prog;
-	a.vers = vers;
-	a.proc = proc;
-	a.args.args_val = argsp;
-	a.xdr_args = xdrargs;
-	r.addr = NULL;
-	r.results.results_val = resp;
-	r.xdr_res = xdrres;
-
-	for (rpcb_vers = RPCBVERS4; rpcb_vers >= RPCBVERS; rpcb_vers--) {
-		CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&rpcb_vers);
-		stat = CLNT_CALL(client, (rpcproc_t)RPCBPROC_CALLIT,
-		    (xdrproc_t) xdr_rpcb_rmtcallargs, (char *)(void *)&a,
-		    (xdrproc_t) xdr_rpcb_rmtcallres, (char *)(void *)&r, tout);
-		if ((stat == RPC_SUCCESS) && (addr_ptr != NULL)) {
-			struct netbuf *na;
-			/*LINTED const castaway*/
-			na = uaddr2taddr((struct netconfig *) nconf, r.addr);
-			if (!na) {
-				stat = RPC_N2AXLATEFAILURE;
-				/*LINTED const castaway*/
-				((struct netbuf *) addr_ptr)->len = 0;
-				goto error;
-			}
-			if (na->len > addr_ptr->maxlen) {
-				/* Too long address */
-				stat = RPC_FAILED; /* XXX A better error no */
-				free(na->buf);
-				free(na);
-				/*LINTED const castaway*/
-				((struct netbuf *) addr_ptr)->len = 0;
-				goto error;
-			}
-			memcpy(addr_ptr->buf, na->buf, (size_t)na->len);
-			/*LINTED const castaway*/
-			((struct netbuf *)addr_ptr)->len = na->len;
-			free(na->buf);
-			free(na);
-			break;
-		} else if ((stat != RPC_PROGVERSMISMATCH) &&
-			    (stat != RPC_PROGUNAVAIL)) {
-			goto error;
-		}
-	}
-error:
-	CLNT_DESTROY(client);
-	if (r.addr)
-		xdr_free((xdrproc_t) xdr_wrapstring, (char *)(void *)&r.addr);
-	return (stat);
-}
-
-/*
- * Gets the time on the remote host.
- * Returns 1 if succeeds else 0.
- */
-bool_t
-rpcb_gettime(host, timep)
-	const char *host;
-	time_t *timep;
-{
-	CLIENT *client = NULL;
-	void *handle;
-	struct netconfig *nconf;
-	rpcvers_t vers;
-	enum clnt_stat st;
-
-
-	if ((host == NULL) || (host[0] == 0)) {
-		time(timep);
-		return (TRUE);
-	}
-
-	if ((handle = __rpc_setconf("netpath")) == NULL) {
-		rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-		return (FALSE);
-	}
-	rpc_createerr.cf_stat = RPC_SUCCESS;
-	while (client == NULL) {
-		if ((nconf = __rpc_getconf(handle)) == NULL) {
-			if (rpc_createerr.cf_stat == RPC_SUCCESS)
-				rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-			break;
-		}
-		client = getclnthandle(host, nconf, NULL);
-		if (client)
-			break;
-	}
-	__rpc_endconf(handle);
-	if (client == (CLIENT *) NULL) {
-		return (FALSE);
-	}
-
-	st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_GETTIME,
-		(xdrproc_t) xdr_void, NULL,
-		(xdrproc_t) xdr_int, (char *)(void *)timep, tottimeout);
-
-	if ((st == RPC_PROGVERSMISMATCH) || (st == RPC_PROGUNAVAIL)) {
-		CLNT_CONTROL(client, CLGET_VERS, (char *)(void *)&vers);
-		if (vers == RPCBVERS4) {
-			/* fall back to earlier version */
-			vers = RPCBVERS;
-			CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&vers);
-			st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_GETTIME,
-				(xdrproc_t) xdr_void, NULL,
-				(xdrproc_t) xdr_int, (char *)(void *)timep,
-				tottimeout);
-		}
-	}
-	CLNT_DESTROY(client);
-	return (st == RPC_SUCCESS? TRUE: FALSE);
-}
-
-static bool_t
-xdr_netbuf(XDR *xdrs, struct netbuf *objp)
-{
-	bool_t dummy;
-	void **pp;
-
-	if (!xdr_uint32_t(xdrs, (uint32_t *) &objp->maxlen)) {
-		return (FALSE);
-	}
-	pp = &objp->buf;
-
-	if (objp->maxlen > RPC_MAXDATASIZE) {
-		return (FALSE);
-	}
-
-	dummy = xdr_bytes(xdrs, (char **) pp,
-			(u_int *)&(objp->len), objp->maxlen);
-	return (dummy);
-}
-
-/*
- * Converts taddr to universal address.  This routine should never
- * really be called because local n2a libraries are always provided.
- */
-char *
-rpcb_taddr2uaddr(struct netconfig *nconf, struct netbuf *taddr)
-{
-	CLIENT *client;
-	char *uaddr = NULL;
-
-
-	/* parameter checking */
-	if (nconf == NULL) {
-		rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-		return (NULL);
-	}
-	if (taddr == NULL) {
-		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
-		return (NULL);
-	}
-	client = local_rpcb();
-	if (! client) {
-		return (NULL);
-	}
-
-	CLNT_CALL(client, (rpcproc_t)RPCBPROC_TADDR2UADDR,
-	    (xdrproc_t) xdr_netbuf, (char *)(void *)taddr,
-	    (xdrproc_t) xdr_wrapstring, (char *)(void *)&uaddr, tottimeout);
-	CLNT_DESTROY(client);
-	return (uaddr);
-}
-
-/*
- * Converts universal address to netbuf.  This routine should never
- * really be called because local n2a libraries are always provided.
- */
-struct netbuf *
-rpcb_uaddr2taddr(struct netconfig *nconf, char *uaddr)
-{
-	CLIENT *client;
-	struct netbuf *taddr;
-
-
-	/* parameter checking */
-	if (nconf == NULL) {
-		rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
-		return (NULL);
-	}
-	if (uaddr == NULL) {
-		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
-		return (NULL);
-	}
-	client = local_rpcb();
-	if (! client) {
-		return (NULL);
-	}
-
-	taddr = (struct netbuf *)malloc(sizeof (struct netbuf), M_RPC, M_WAITOK|M_ZERO);
-	if (CLNT_CALL(client, (rpcproc_t)RPCBPROC_UADDR2TADDR,
-	    (xdrproc_t) xdr_wrapstring, (char *)(void *)&uaddr,
-	    (xdrproc_t) xdr_netbuf, (char *)(void *)taddr,
-	    tottimeout) != RPC_SUCCESS) {
-		free(taddr);
-		taddr = NULL;
-	}
-	CLNT_DESTROY(client);
-	return (taddr);
-}
-
-#endif

From 2834fd2ad58b42c45aa02d0cd21fc1c04b3c278a Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Wed, 8 Jan 2025 20:00:12 -0800
Subject: [PATCH 073/143] kgssapi: remove the debug module

Its build was disabled since original bulk check-in in 2008.  Today it
fails to compile due to multiple errors.  I also tried to build it on
stable/10, and that failed, too.  I guess it wasn't buildable since
initial check-in.
---
 sys/conf/files        |    1 -
 sys/conf/options      |    1 -
 sys/kgssapi/gsstest.c | 1145 -----------------------------------------
 3 files changed, 1147 deletions(-)
 delete mode 100644 sys/kgssapi/gsstest.c

diff --git a/sys/conf/files b/sys/conf/files
index 428a2805768c50..d358737c561320 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4038,7 +4038,6 @@ kgssapi/krb5/krb5_mech.c	optional kgssapi
 kgssapi/krb5/kcrypto.c		optional kgssapi
 kgssapi/krb5/kcrypto_aes.c	optional kgssapi
 kgssapi/kgss_if.m		optional kgssapi
-kgssapi/gsstest.c		optional kgssapi_debug
 # These files in libkern/ are those needed by all architectures.  Some
 # of the files in libkern/ are only needed on some architectures, e.g.,
 # libkern/divdi3.c is needed by i386 but not alpha.  Also, some of these
diff --git a/sys/conf/options b/sys/conf/options
index 438d0e81889c8a..c467dc9995c254 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -290,7 +290,6 @@ TARFS_DEBUG	opt_tarfs.h
 
 # In-kernel GSS-API
 KGSSAPI		opt_kgssapi.h
-KGSSAPI_DEBUG	opt_kgssapi.h
 
 # These static filesystems have one slightly bogus static dependency in
 # sys/i386/i386/autoconf.c.  If any of these filesystems are
diff --git a/sys/kgssapi/gsstest.c b/sys/kgssapi/gsstest.c
deleted file mode 100644
index e47b25042d1c64..00000000000000
--- a/sys/kgssapi/gsstest.c
+++ /dev/null
@@ -1,1145 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
- * Authors: Doug Rabson <dfr@rabson.org>
- * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-#include <sys/ctype.h>
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/kobj.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/socketvar.h>
-#include <sys/sysent.h>
-#include <sys/sysproto.h>
-
-#include <kgssapi/gssapi.h>
-#include <kgssapi/gssapi_impl.h>
-#include <rpc/rpc.h>
-#include <rpc/rpc_com.h>
-#include <rpc/rpcb_prot.h>
-#include <rpc/rpcsec_gss.h>
-
-static void
-report_error(gss_OID mech, OM_uint32 maj, OM_uint32 min)
-{
-	OM_uint32 maj_stat, min_stat;
-	OM_uint32 message_context;
-	gss_buffer_desc buf;
-
-	uprintf("major_stat=%d, minor_stat=%d\n", maj, min);
-	message_context = 0;
-	do {
-		maj_stat = gss_display_status(&min_stat, maj,
-		    GSS_C_GSS_CODE, GSS_C_NO_OID, &message_context, &buf);
-		if (GSS_ERROR(maj_stat))
-			break;
-		uprintf("%.*s\n", (int)buf.length, (char *) buf.value);
-		gss_release_buffer(&min_stat, &buf);
-	} while (message_context);
-	if (mech && min) {
-		message_context = 0;
-		do {
-			maj_stat = gss_display_status(&min_stat, min,
-			    GSS_C_MECH_CODE, mech, &message_context, &buf);
-			if (GSS_ERROR(maj_stat))
-				break;
-			uprintf("%.*s\n", (int)buf.length, (char *) buf.value);
-			gss_release_buffer(&min_stat, &buf);
-		} while (message_context);
-	}
-}
-
-#if 0
-static void
-send_token_to_peer(const gss_buffer_t token)
-{
-	const uint8_t *p;
-	size_t i;
-
-	printf("send token:\n");
-	printf("%d ", (int) token->length);
-	p = (const uint8_t *) token->value;
-	for (i = 0; i < token->length; i++)
-		printf("%02x", *p++);
-	printf("\n");
-}
-
-static void
-receive_token_from_peer(gss_buffer_t token)
-{
-	char line[8192];
-	char *p;
-	uint8_t *q;
-	int len, val;
-
-	printf("receive token:\n");
-	fgets(line, sizeof(line), stdin);
-	if (line[strlen(line) - 1] != '\n') {
-		printf("token truncated\n");
-		exit(1);
-	}
-	p = line;
-	if (sscanf(line, "%d ", &len) != 1) {
-		printf("bad token\n");
-		exit(1);
-	}
-	p = strchr(p, ' ') + 1;
-	token->length = len;
-	token->value = malloc(len);
-	q = (uint8_t *) token->value;
-	while (len) {
-		if (sscanf(p, "%02x", &val) != 1) {
-			printf("bad token\n");
-			exit(1);
-		}
-		*q++ = val;
-		p += 2;
-		len--;
-	}
-}
-#endif
-
-#if 0
-void
-server(int argc, char** argv)
-{
-	OM_uint32 maj_stat, min_stat;
-	gss_buffer_desc input_token, output_token;
-	gss_ctx_id_t context_hdl = GSS_C_NO_CONTEXT;
-	gss_name_t client_name;
-	gss_OID mech_type;
-
-	if (argc != 1)
-		usage();
-
-	do {
-		receive_token_from_peer(&input_token);
-		maj_stat = gss_accept_sec_context(&min_stat,
-		    &context_hdl,
-		    GSS_C_NO_CREDENTIAL,
-		    &input_token,
-		    GSS_C_NO_CHANNEL_BINDINGS,
-		    &client_name,
-		    &mech_type,
-		    &output_token,
-		    NULL,
-		    NULL,
-		    NULL);
-		if (GSS_ERROR(maj_stat)) {
-			report_error(mech_type, maj_stat, min_stat);
-		}
-		if (output_token.length != 0) {
-			send_token_to_peer(&output_token);
-			gss_release_buffer(&min_stat, &output_token);
-		}
-		if (GSS_ERROR(maj_stat)) {
-			if (context_hdl != GSS_C_NO_CONTEXT)
-				gss_delete_sec_context(&min_stat,
-				    &context_hdl,
-				    GSS_C_NO_BUFFER);
-			break;
-		}
-	} while (maj_stat & GSS_S_CONTINUE_NEEDED);
-
-	if (client_name) {
-		gss_buffer_desc name_desc;
-		char buf[512];
-
-		gss_display_name(&min_stat, client_name, &name_desc, NULL);
-		memcpy(buf, name_desc.value, name_desc.length);
-		buf[name_desc.length] = 0;
-		gss_release_buffer(&min_stat, &name_desc);
-		printf("client name is %s\n", buf);
-	}
-
-	receive_token_from_peer(&input_token);
-	gss_unwrap(&min_stat, context_hdl, &input_token, &output_token,
-	    NULL, NULL);
-	printf("%.*s\n", (int)output_token.length, (char *) output_token.value);
-	gss_release_buffer(&min_stat, &output_token);
-}
-#endif
-
-/* 1.2.752.43.13.14 */
-static gss_OID_desc gss_krb5_set_allowable_enctypes_x_desc =
-{6, (void *) "\x2a\x85\x70\x2b\x0d\x0e"};
-
-gss_OID GSS_KRB5_SET_ALLOWABLE_ENCTYPES_X = &gss_krb5_set_allowable_enctypes_x_desc;
-#define ETYPE_DES_CBC_CRC	1
-
-/*
- * Create an initiator context and acceptor context in the kernel and
- * use them to exchange signed and sealed messages.
- */
-static int
-gsstest_1(struct thread *td)
-{
-	OM_uint32 maj_stat, min_stat;
-	OM_uint32 smaj_stat, smin_stat;
-	int context_established = 0;
-	gss_ctx_id_t client_context = GSS_C_NO_CONTEXT;
-	gss_ctx_id_t server_context = GSS_C_NO_CONTEXT;
-	gss_cred_id_t client_cred = GSS_C_NO_CREDENTIAL;
-	gss_cred_id_t server_cred = GSS_C_NO_CREDENTIAL;
-	gss_name_t name = GSS_C_NO_NAME;
-	gss_name_t received_name = GSS_C_NO_NAME;
-	gss_buffer_desc name_desc;
-	gss_buffer_desc client_token, server_token, message_buf;
-	gss_OID mech, actual_mech, mech_type;
-	static gss_OID_desc krb5_desc =
-		{9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"};
-#if 0
-	static gss_OID_desc spnego_desc =
-		{6, (void *)"\x2b\x06\x01\x05\x05\x02"};
-	static gss_OID_desc ntlm_desc =
-		{10, (void *)"\x2b\x06\x01\x04\x01\x82\x37\x02\x02\x0a"};
-#endif
-	char enctype[sizeof(uint32_t)];
-
-	mech = GSS_C_NO_OID;
-
-	{
-		static char sbuf[512];
-		memcpy(sbuf, "nfs@", 4);
-		getcredhostname(td->td_ucred, sbuf + 4, sizeof(sbuf) - 4);
-		name_desc.value = sbuf;
-	}
-
-	name_desc.length = strlen((const char *) name_desc.value);
-	maj_stat = gss_import_name(&min_stat, &name_desc,
-	    GSS_C_NT_HOSTBASED_SERVICE, &name);
-	if (GSS_ERROR(maj_stat)) {
-		printf("gss_import_name failed\n");
-		report_error(mech, maj_stat, min_stat);
-		goto out;
-	}
-
-	maj_stat = gss_acquire_cred(&min_stat, GSS_C_NO_NAME,
-	    0, GSS_C_NO_OID_SET, GSS_C_INITIATE, &client_cred,
-	    NULL, NULL);
-	if (GSS_ERROR(maj_stat)) {
-		printf("gss_acquire_cred (client) failed\n");
-		report_error(mech, maj_stat, min_stat);
-		goto out;
-	}
-
-	enctype[0] = (ETYPE_DES_CBC_CRC >> 24) & 0xff;
-	enctype[1] = (ETYPE_DES_CBC_CRC >> 16) & 0xff;
-	enctype[2] = (ETYPE_DES_CBC_CRC >> 8) & 0xff;
-	enctype[3] = ETYPE_DES_CBC_CRC & 0xff;
-	message_buf.length = sizeof(enctype);
-	message_buf.value = enctype;
-	maj_stat = gss_set_cred_option(&min_stat, &client_cred,
-	    GSS_KRB5_SET_ALLOWABLE_ENCTYPES_X, &message_buf);
-	if (GSS_ERROR(maj_stat)) {
-		printf("gss_set_cred_option failed\n");
-		report_error(mech, maj_stat, min_stat);
-		goto out;
-	}
-
-	server_token.length = 0;
-	server_token.value = NULL;
-	while (!context_established) {
-		client_token.length = 0;
-		client_token.value = NULL;
-		maj_stat = gss_init_sec_context(&min_stat,
-		    client_cred,
-		    &client_context,
-		    name,
-		    mech,
-		    GSS_C_MUTUAL_FLAG|GSS_C_CONF_FLAG|GSS_C_INTEG_FLAG,
-		    0,
-		    GSS_C_NO_CHANNEL_BINDINGS,
-		    &server_token,
-		    &actual_mech,
-		    &client_token,
-		    NULL,
-		    NULL);
-		if (server_token.length)
-			gss_release_buffer(&smin_stat, &server_token);
-		if (GSS_ERROR(maj_stat)) {
-			printf("gss_init_sec_context failed\n");
-			report_error(mech, maj_stat, min_stat);
-			goto out;
-		}
-
-		if (client_token.length != 0) {
-			if (!server_cred) {
-				gss_OID_set_desc oid_set;
-				oid_set.count = 1;
-				oid_set.elements = &krb5_desc;
-				smaj_stat = gss_acquire_cred(&smin_stat,
-				    name, 0, &oid_set, GSS_C_ACCEPT, &server_cred,
-				    NULL, NULL);
-				if (GSS_ERROR(smaj_stat)) {
-					printf("gss_acquire_cred (server) failed\n");
-					report_error(mech_type, smaj_stat, smin_stat);
-					goto out;
-				}
-			}
-			smaj_stat = gss_accept_sec_context(&smin_stat,
-			    &server_context,
-			    server_cred,
-			    &client_token,
-			    GSS_C_NO_CHANNEL_BINDINGS,
-			    &received_name,
-			    &mech_type,
-			    &server_token,
-			    NULL,
-			    NULL,
-			    NULL);
-			if (GSS_ERROR(smaj_stat)) {
-				printf("gss_accept_sec_context failed\n");
-				report_error(mech_type, smaj_stat, smin_stat);
-				goto out;
-			}
-			gss_release_buffer(&min_stat, &client_token);
-		}
-		if (GSS_ERROR(maj_stat)) {
-			if (client_context != GSS_C_NO_CONTEXT)
-				gss_delete_sec_context(&min_stat,
-				    &client_context,
-				    GSS_C_NO_BUFFER);
-			break;
-		}
-
-		if (maj_stat == GSS_S_COMPLETE) {
-			context_established = 1;
-		}
-	}
-
-	message_buf.length = strlen("Hello world");
-	message_buf.value = (void *) "Hello world";
-
-	maj_stat = gss_get_mic(&min_stat, client_context,
-	    GSS_C_QOP_DEFAULT, &message_buf, &client_token);
-	if (GSS_ERROR(maj_stat)) {
-		printf("gss_get_mic failed\n");
-		report_error(mech_type, maj_stat, min_stat);
-		goto out;
-	}
-	maj_stat = gss_verify_mic(&min_stat, server_context,
-	    &message_buf, &client_token, NULL);
-	if (GSS_ERROR(maj_stat)) {
-		printf("gss_verify_mic failed\n");
-		report_error(mech_type, maj_stat, min_stat);
-		goto out;
-	}
-	gss_release_buffer(&min_stat, &client_token);
-
-	maj_stat = gss_wrap(&min_stat, client_context,
-	    TRUE, GSS_C_QOP_DEFAULT, &message_buf, NULL, &client_token);
-	if (GSS_ERROR(maj_stat)) {
-		printf("gss_wrap failed\n");
-		report_error(mech_type, maj_stat, min_stat);
-		goto out;
-	}
-	maj_stat = gss_unwrap(&min_stat, server_context,
-	    &client_token, &server_token, NULL, NULL);
-	if (GSS_ERROR(maj_stat)) {
-		printf("gss_unwrap failed\n");
-		report_error(mech_type, maj_stat, min_stat);
-		goto out;
-	}
-
- 	if (message_buf.length != server_token.length
-	    || memcmp(message_buf.value, server_token.value,
-		message_buf.length))
-		printf("unwrap result corrupt\n");
-
-	gss_release_buffer(&min_stat, &client_token);
-	gss_release_buffer(&min_stat, &server_token);
-
-out:
-	if (client_context)
-		gss_delete_sec_context(&min_stat, &client_context,
-		    GSS_C_NO_BUFFER);
-	if (server_context)
-		gss_delete_sec_context(&min_stat, &server_context,
-		    GSS_C_NO_BUFFER);
-	if (client_cred)
-		gss_release_cred(&min_stat, &client_cred);
-	if (server_cred)
-		gss_release_cred(&min_stat, &server_cred);
-	if (name)
-		gss_release_name(&min_stat, &name);
-	if (received_name)
-		gss_release_name(&min_stat, &received_name);
-
-	return (0);
-}
-
-/*
- * Interoperability with userland. This takes several steps:
- *
- * 1. Accept an initiator token from userland, return acceptor
- * token. Repeat this step until both userland and kernel return
- * GSS_S_COMPLETE.
- *
- * 2. Receive a signed message from userland and verify the
- * signature. Return a signed reply to userland for it to verify.
- *
- * 3. Receive a wrapped message from userland and unwrap it. Return a
- * wrapped reply to userland.
- */
-static int
-gsstest_2(struct thread *td, int step, const gss_buffer_t input_token,
-    OM_uint32 *maj_stat_res, OM_uint32 *min_stat_res, gss_buffer_t output_token)
-{
-	OM_uint32 maj_stat, min_stat;
-	static int context_established = 0;
-	static gss_ctx_id_t server_context = GSS_C_NO_CONTEXT;
-	static gss_cred_id_t server_cred = GSS_C_NO_CREDENTIAL;
-	static gss_name_t name = GSS_C_NO_NAME;
-	gss_buffer_desc name_desc;
-	gss_buffer_desc message_buf;
-	gss_OID mech_type = GSS_C_NO_OID;
-	char enctype[sizeof(uint32_t)];
-	int error = EINVAL;
-
-	maj_stat = GSS_S_FAILURE;
-	min_stat = 0;
-	switch (step) {
-	case 1:
-		if (server_context == GSS_C_NO_CONTEXT) {
-			static char sbuf[512];
-			memcpy(sbuf, "nfs@", 4);
-			getcredhostname(td->td_ucred, sbuf + 4,
-			    sizeof(sbuf) - 4);
-			name_desc.value = sbuf;
-			name_desc.length = strlen((const char *)
-			    name_desc.value);
-			maj_stat = gss_import_name(&min_stat, &name_desc,
-			    GSS_C_NT_HOSTBASED_SERVICE, &name);
-			if (GSS_ERROR(maj_stat)) {
-				printf("gss_import_name failed\n");
-				report_error(mech_type, maj_stat, min_stat);
-				goto out;
-			}
-
-			maj_stat = gss_acquire_cred(&min_stat,
-			    name, 0, GSS_C_NO_OID_SET, GSS_C_ACCEPT,
-			    &server_cred, NULL, NULL);
-			if (GSS_ERROR(maj_stat)) {
-				printf("gss_acquire_cred (server) failed\n");
-				report_error(mech_type, maj_stat, min_stat);
-				goto out;
-			}
-
-			enctype[0] = (ETYPE_DES_CBC_CRC >> 24) & 0xff;
-			enctype[1] = (ETYPE_DES_CBC_CRC >> 16) & 0xff;
-			enctype[2] = (ETYPE_DES_CBC_CRC >> 8) & 0xff;
-			enctype[3] = ETYPE_DES_CBC_CRC & 0xff;
-			message_buf.length = sizeof(enctype);
-			message_buf.value = enctype;
-			maj_stat = gss_set_cred_option(&min_stat, &server_cred,
-			    GSS_KRB5_SET_ALLOWABLE_ENCTYPES_X, &message_buf);
-			if (GSS_ERROR(maj_stat)) {
-				printf("gss_set_cred_option failed\n");
-				report_error(mech_type, maj_stat, min_stat);
-				goto out;
-			}
-		}
-
-		maj_stat = gss_accept_sec_context(&min_stat,
-		    &server_context,
-		    server_cred,
-		    input_token,
-		    GSS_C_NO_CHANNEL_BINDINGS,
-		    NULL,
-		    &mech_type,
-		    output_token,
-		    NULL,
-		    NULL,
-		    NULL);
-		if (GSS_ERROR(maj_stat)) {
-			printf("gss_accept_sec_context failed\n");
-			report_error(mech_type, maj_stat, min_stat);
-			goto out;
-		}
-
-		if (maj_stat == GSS_S_COMPLETE) {
-			context_established = 1;
-		}
-		*maj_stat_res = maj_stat;
-		*min_stat_res = min_stat;
-		break;
-
-	case 2:
-		message_buf.length = strlen("Hello world");
-		message_buf.value = (void *) "Hello world";
-
-		maj_stat = gss_verify_mic(&min_stat, server_context,
-		    &message_buf, input_token, NULL);
-		if (GSS_ERROR(maj_stat)) {
-			printf("gss_verify_mic failed\n");
-			report_error(mech_type, maj_stat, min_stat);
-			goto out;
-		}
-
-		maj_stat = gss_get_mic(&min_stat, server_context,
-		    GSS_C_QOP_DEFAULT, &message_buf, output_token);
-		if (GSS_ERROR(maj_stat)) {
-			printf("gss_get_mic failed\n");
-			report_error(mech_type, maj_stat, min_stat);
-			goto out;
-		}
-		break;
-
-	case 3:
-		maj_stat = gss_unwrap(&min_stat, server_context,
-		    input_token, &message_buf, NULL, NULL);
-		if (GSS_ERROR(maj_stat)) {
-			printf("gss_unwrap failed\n");
-			report_error(mech_type, maj_stat, min_stat);
-			goto out;
-		}
-		gss_release_buffer(&min_stat, &message_buf);
-
-		message_buf.length = strlen("Hello world");
-		message_buf.value = (void *) "Hello world";
-		maj_stat = gss_wrap(&min_stat, server_context,
-		    TRUE, GSS_C_QOP_DEFAULT, &message_buf, NULL, output_token);
-		if (GSS_ERROR(maj_stat)) {
-			printf("gss_wrap failed\n");
-			report_error(mech_type, maj_stat, min_stat);
-			goto out;
-		}
-		break;
-
-	case 4:
-		maj_stat = gss_unwrap(&min_stat, server_context,
-		    input_token, &message_buf, NULL, NULL);
-		if (GSS_ERROR(maj_stat)) {
-			printf("gss_unwrap failed\n");
-			report_error(mech_type, maj_stat, min_stat);
-			goto out;
-		}
-		gss_release_buffer(&min_stat, &message_buf);
-
-		message_buf.length = strlen("Hello world");
-		message_buf.value = (void *) "Hello world";
-		maj_stat = gss_wrap(&min_stat, server_context,
-		    FALSE, GSS_C_QOP_DEFAULT, &message_buf, NULL, output_token);
-		if (GSS_ERROR(maj_stat)) {
-			printf("gss_wrap failed\n");
-			report_error(mech_type, maj_stat, min_stat);
-			goto out;
-		}
-		break;
-
-	case 5:
-		error = 0;
-		goto out;
-	}
-	*maj_stat_res = maj_stat;
-	*min_stat_res = min_stat;
-	return (0);
-
-out:
-	*maj_stat_res = maj_stat;
-	*min_stat_res = min_stat;
-	if (server_context)
-		gss_delete_sec_context(&min_stat, &server_context,
-		    GSS_C_NO_BUFFER);
-	if (server_cred)
-		gss_release_cred(&min_stat, &server_cred);
-	if (name)
-		gss_release_name(&min_stat, &name);
-
-	return (error);
-}
-
-/*
- * Create an RPC client handle for the given (address,prog,vers)
- * triple using UDP.
- */
-static CLIENT *
-gsstest_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers)
-{
-	struct thread *td = curthread;
-	const char* protofmly;
-	struct sockaddr_storage ss;
-	struct socket *so;
-	CLIENT *rpcb;
-	struct timeval timo;
-	RPCB parms;
-	char *uaddr;
-	enum clnt_stat stat = RPC_SUCCESS;
-	int rpcvers = RPCBVERS4;
-	bool_t do_tcp = FALSE;
-	struct portmap mapping;
-	u_short port = 0;
-
-	/*
-	 * First we need to contact the remote RPCBIND service to find
-	 * the right port.
-	 */
-	memcpy(&ss, sa, sa->sa_len);
-	switch (ss.ss_family) {
-	case AF_INET:
-		((struct sockaddr_in *)&ss)->sin_port = htons(111);
-		protofmly = "inet";
-		socreate(AF_INET, &so, SOCK_DGRAM, 0, td->td_ucred, td);
-		break;
-		
-#ifdef INET6
-	case AF_INET6:
-		((struct sockaddr_in6 *)&ss)->sin6_port = htons(111);
-		protofmly = "inet6";
-		socreate(AF_INET6, &so, SOCK_DGRAM, 0, td->td_ucred, td);
-		break;
-#endif
-
-	default:
-		/*
-		 * Unsupported address family - fail.
-		 */
-		return (NULL);
-	}
-
-	rpcb = clnt_dg_create(so, (struct sockaddr *)&ss,
-	    RPCBPROG, rpcvers, 0, 0);
-	if (!rpcb)
-		return (NULL);
-
-try_tcp:
-	parms.r_prog = prog;
-	parms.r_vers = vers;
-	if (do_tcp)
-		parms.r_netid = "tcp";
-	else
-		parms.r_netid = "udp";
-	parms.r_addr = "";
-	parms.r_owner = "";
-
-	/*
-	 * Use the default timeout.
-	 */
-	timo.tv_sec = 25;
-	timo.tv_usec = 0;
-again:
-	switch (rpcvers) {
-	case RPCBVERS4:
-	case RPCBVERS:
-		/*
-		 * Try RPCBIND 4 then 3.
-		 */
-		uaddr = NULL;
-		stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR,
-		    (xdrproc_t) xdr_rpcb, &parms,
-		    (xdrproc_t) xdr_wrapstring, &uaddr, timo);
-		if (stat == RPC_PROGVERSMISMATCH) {
-			if (rpcvers == RPCBVERS4)
-				rpcvers = RPCBVERS;
-			else if (rpcvers == RPCBVERS)
-				rpcvers = PMAPVERS;
-			CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers);
-			goto again;
-		} else if (stat == RPC_SUCCESS) {
-			/*
-			 * We have a reply from the remote RPCBIND - turn it
-			 * into an appropriate address and make a new client
-			 * that can talk to the remote service.
-			 *
-			 * XXX fixup IPv6 scope ID.
-			 */
-			struct netbuf *a;
-			a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr);
-			xdr_free((xdrproc_t) xdr_wrapstring, &uaddr);
-			if (!a) {
-				CLNT_DESTROY(rpcb);
-				return (NULL);
-			}
-			memcpy(&ss, a->buf, a->len);
-			free(a->buf, M_RPC);
-			free(a, M_RPC);
-		}
-		break;
-	case PMAPVERS:
-		/*
-		 * Try portmap.
-		 */
-		mapping.pm_prog = parms.r_prog;
-		mapping.pm_vers = parms.r_vers;
-		mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP;
-		mapping.pm_port = 0;
-
-		stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT,
-		    (xdrproc_t) xdr_portmap, &mapping,
-		    (xdrproc_t) xdr_u_short, &port, timo);
-
-		if (stat == RPC_SUCCESS) {
-			switch (ss.ss_family) {
-			case AF_INET:
-				((struct sockaddr_in *)&ss)->sin_port =
-					htons(port);
-				break;
-		
-#ifdef INET6
-			case AF_INET6:
-				((struct sockaddr_in6 *)&ss)->sin6_port =
-					htons(port);
-				break;
-#endif
-			}
-		}
-		break;
-	default:
-		panic("invalid rpcvers %d", rpcvers);
-	}
-	/*
-	 * We may have a positive response from the portmapper, but
-	 * the requested service was not found. Make sure we received
-	 * a valid port.
-	 */
-	switch (ss.ss_family) {
-	case AF_INET:
-		port = ((struct sockaddr_in *)&ss)->sin_port;
-		break;
-#ifdef INET6
-	case AF_INET6:
-		port = ((struct sockaddr_in6 *)&ss)->sin6_port;
-		break;
-#endif
-	}
-	if (stat != RPC_SUCCESS || !port) {
-		/*
-		 * If we were able to talk to rpcbind or portmap, but the udp
-		 * variant wasn't available, ask about tcp.
-		 *
-		 * XXX - We could also check for a TCP portmapper, but
-		 * if the host is running a portmapper at all, we should be able
-		 * to hail it over UDP.
-		 */
-		if (stat == RPC_SUCCESS && !do_tcp) {
-			do_tcp = TRUE;
-			goto try_tcp;
-		}
-
-		/* Otherwise, bad news. */
-		printf("gsstest_get_rpc: failed to contact remote rpcbind, "
-		    "stat = %d, port = %d\n",
-		    (int) stat, port);
-		CLNT_DESTROY(rpcb);
-		return (NULL);
-	}
-
-	if (do_tcp) {
-		/*
-		 * Destroy the UDP client we used to speak to rpcbind and
-		 * recreate as a TCP client.
-		 */
-		struct netconfig *nconf = NULL;
-
-		CLNT_DESTROY(rpcb);
-
-		switch (ss.ss_family) {
-		case AF_INET:
-			nconf = getnetconfigent("tcp");
-			break;
-#ifdef INET6
-		case AF_INET6:
-			nconf = getnetconfigent("tcp6");
-			break;
-#endif
-		}
-
-		rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss,
-		    prog, vers, 0, 0);
-	} else {
-		/*
-		 * Re-use the client we used to speak to rpcbind.
-		 */
-		CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss);
-		CLNT_CONTROL(rpcb, CLSET_PROG, &prog);
-		CLNT_CONTROL(rpcb, CLSET_VERS, &vers);
-	}
-
-	return (rpcb);
-}
-
-/*
- * RPCSEC_GSS client
- */
-static int
-gsstest_3(struct thread *td)
-{
-	struct sockaddr_in sin;
-	char service[128];
-	CLIENT *client;
-	AUTH *auth;
-	rpc_gss_options_ret_t options_ret;
-	enum clnt_stat stat;
-	struct timeval tv;
-	rpc_gss_service_t svc;
-	int i;
-
-	sin.sin_len = sizeof(sin);
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-	sin.sin_port = 0;
-
-	client = gsstest_get_rpc((struct sockaddr *) &sin, 123456, 1);
-	if (!client) {
-		uprintf("Can't connect to service\n");
-		return(1);
-	}
-
-	memcpy(service, "host@", 5);
-	getcredhostname(td->td_ucred, service + 5, sizeof(service) - 5);
-
-	auth = rpc_gss_seccreate(client, curthread->td_ucred,
-	    service, "kerberosv5", rpc_gss_svc_privacy,
-	    NULL, NULL, &options_ret);
-	if (!auth) {
-		gss_OID oid;
-		uprintf("Can't authorize to service (mech=%s)\n",
-			options_ret.actual_mechanism);
-		oid = GSS_C_NO_OID;
-		rpc_gss_mech_to_oid(options_ret.actual_mechanism, &oid);
-		report_error(oid, options_ret.major_status,
-		    options_ret.minor_status);
-		CLNT_DESTROY(client);
-		return (1);
-	}
-
-	for (svc = rpc_gss_svc_none; svc <= rpc_gss_svc_privacy; svc++) {
-		const char *svc_names[] = {
-			"rpc_gss_svc_default",
-			"rpc_gss_svc_none",
-			"rpc_gss_svc_integrity",
-			"rpc_gss_svc_privacy"
-		};
-		int num;
-
-		rpc_gss_set_defaults(auth, svc, NULL);
-
-		client->cl_auth = auth;
-		tv.tv_sec = 5;
-		tv.tv_usec = 0;
-		for (i = 42; i < 142; i++) {
-			num = i;
-			stat = CLNT_CALL(client, 1,
-			    (xdrproc_t) xdr_int, (char *) &num,
-			    (xdrproc_t) xdr_int, (char *) &num, tv);
-			if (stat == RPC_SUCCESS) {
-				if (num != i + 100)
-					uprintf("unexpected reply %d\n", num);
-			} else {
-				uprintf("call failed, stat=%d\n", (int) stat);
-				break;
-			}
-		}
-		if (i == 142)
-			uprintf("call succeeded with %s\n", svc_names[svc]);
-	}
-
-	AUTH_DESTROY(auth);
-	CLNT_RELEASE(client);
-
-	return (0);
-}
-
-/*
- * RPCSEC_GSS server
- */
-static rpc_gss_principal_t server_acl = NULL;
-static bool_t server_new_context(struct svc_req *req, gss_cred_id_t deleg,
-    gss_ctx_id_t gss_context, rpc_gss_lock_t *lock, void **cookie);
-static void server_program_1(struct svc_req *rqstp, register SVCXPRT *transp);
-
-static int
-gsstest_4(struct thread *td)
-{
-	SVCPOOL *pool;
-	char principal[128 + 5];
-	const char **mechs;
-	static rpc_gss_callback_t cb;
-
-	memcpy(principal, "host@", 5);
-	getcredhostname(td->td_ucred, principal + 5, sizeof(principal) - 5);
-
-	mechs = rpc_gss_get_mechanisms();
-	while (*mechs) {
-		if (!rpc_gss_set_svc_name(principal, *mechs, GSS_C_INDEFINITE,
-			123456, 1)) {
-			rpc_gss_error_t e;
-
-			rpc_gss_get_error(&e);
-			printf("setting name for %s for %s failed: %d, %d\n",
-			    principal, *mechs,
-			    e.rpc_gss_error, e.system_error);
-		}
-		mechs++;
-	}
-
-	cb.program = 123456;
-	cb.version = 1;
-	cb.callback = server_new_context;
-	rpc_gss_set_callback(&cb);
-
-	pool = svcpool_create("gsstest", NULL);
-
-	svc_create(pool, server_program_1, 123456, 1, NULL);
-	svc_run(pool);
-
-	rpc_gss_clear_svc_name(123456, 1);
-	rpc_gss_clear_callback(&cb);
-
-	svcpool_destroy(pool);
-
-	return (0);
-}
-
-static void
-server_program_1(struct svc_req *rqstp, register SVCXPRT *transp)
-{
-	rpc_gss_rawcred_t *rcred;
-	rpc_gss_ucred_t *ucred;
-	int		i, num;
-
-	if (rqstp->rq_cred.oa_flavor != RPCSEC_GSS) {
-		svcerr_weakauth(rqstp);
-		return;
-	}		
-		
-	if (!rpc_gss_getcred(rqstp, &rcred, &ucred, NULL)) {
-		svcerr_systemerr(rqstp);
-		return;
-	}
-
-	printf("svc=%d, mech=%s, uid=%d, gid=%d, gids={",
-	    rcred->service, rcred->mechanism, ucred->uid, ucred->gid);
-	for (i = 0; i < ucred->gidlen; i++) {
-		if (i > 0) printf(",");
-		printf("%d", ucred->gidlist[i]);
-	}
-	printf("}\n");
-
-	switch (rqstp->rq_proc) {
-	case 0:
-		if (!svc_getargs(rqstp, (xdrproc_t) xdr_void, 0)) {
-			svcerr_decode(rqstp);
-			goto out;
-		}
-		if (!svc_sendreply(rqstp, (xdrproc_t) xdr_void, 0)) {
-			svcerr_systemerr(rqstp);
-		}
-		goto out;
-
-	case 1:
-		if (!svc_getargs(rqstp, (xdrproc_t) xdr_int,
-			(char *) &num)) {
-			svcerr_decode(rqstp);
-			goto out;
-		}
-		num += 100;
-		if (!svc_sendreply(rqstp, (xdrproc_t) xdr_int,
-			(char *) &num)) {
-			svcerr_systemerr(rqstp);
-		}
-		goto out;
-
-	default:
-		svcerr_noproc(rqstp);
-		goto out;
-	}
-
-out:
-	svc_freereq(rqstp);
-	return;
-}
-
-static void
-print_principal(rpc_gss_principal_t principal)
-{
-	int i, len, n;
-	uint8_t *p;
-
-	len = principal->len;
-	p = (uint8_t *) principal->name;
-	while (len > 0) {
-		n = len;
-		if (n > 16)
-			n = 16;
-		for (i = 0; i < n; i++)
-			printf("%02x ", p[i]);
-		for (; i < 16; i++)
-			printf("   ");
-		printf("|");
-		for (i = 0; i < n; i++)
-			printf("%c", isprint(p[i]) ? p[i] : '.');
-		printf("|\n");
-		len -= n;
-		p += n;
-	}
-}
-
-static bool_t
-server_new_context(__unused struct svc_req *req,
-    gss_cred_id_t deleg,
-    __unused gss_ctx_id_t gss_context,
-    rpc_gss_lock_t *lock,
-    __unused void **cookie)
-{
-	rpc_gss_rawcred_t *rcred = lock->raw_cred;
-	OM_uint32 junk;
-
-	printf("new security context version=%d, mech=%s, qop=%s:\n",
-	    rcred->version, rcred->mechanism, rcred->qop);
-	print_principal(rcred->client_principal);
-
-	if (server_acl) {
-		if (rcred->client_principal->len != server_acl->len
-		    || memcmp(rcred->client_principal->name, server_acl->name,
-			server_acl->len)) {
-			return (FALSE);
-		}
-	}
-	gss_release_cred(&junk, &deleg);
-
-	return (TRUE);
-}
-
-/*
- * Hook up a syscall for gssapi testing.
- */
-
-struct gsstest_args {
-        int a_op;
-	void *a_args;
-	void *a_res;
-};
-
-struct gsstest_2_args {
-	int step;		/* test step number */
-	gss_buffer_desc input_token; /* token from userland */
-	gss_buffer_desc output_token; /* buffer to receive reply token */
-};
-struct gsstest_2_res {
-	OM_uint32 maj_stat;	/* maj_stat from kernel */
-	OM_uint32 min_stat;	/* min_stat from kernel */
-	gss_buffer_desc output_token; /* reply token (using space from gsstest_2_args.output) */
-};
-
-static int
-gsstest(struct thread *td, struct gsstest_args *uap)
-{
-	int error;
-
-	switch (uap->a_op) {
-	case 1:
-                return (gsstest_1(td));
-
-	case 2: {
-		struct gsstest_2_args args;
-		struct gsstest_2_res res;
-		gss_buffer_desc input_token, output_token;
-		OM_uint32 junk;
-
-		error = copyin(uap->a_args, &args, sizeof(args));
-		if (error)
-			return (error);
-		input_token.length = args.input_token.length;
-		input_token.value = malloc(input_token.length, M_GSSAPI,
-		    M_WAITOK);
-		error = copyin(args.input_token.value, input_token.value,
-		    input_token.length);
-		if (error) {
-			gss_release_buffer(&junk, &input_token);
-			return (error);
-		}
-		output_token.length = 0;
-		output_token.value = NULL;
-		gsstest_2(td, args.step, &input_token,
-		    &res.maj_stat, &res.min_stat, &output_token);
-		gss_release_buffer(&junk, &input_token);
-		if (output_token.length > args.output_token.length) {
-			gss_release_buffer(&junk, &output_token);
-			return (EOVERFLOW);
-		}
-		res.output_token.length = output_token.length;
-		res.output_token.value = args.output_token.value;
-		error = copyout(output_token.value, res.output_token.value,
-		    output_token.length);
-		gss_release_buffer(&junk, &output_token);
-		if (error)
-			return (error);
-
-		return (copyout(&res, uap->a_res, sizeof(res)));
-		
-		break;
-	}
-	case 3:
-		return (gsstest_3(td));
-	case 4:
-		return (gsstest_4(td));
-	}
-
-        return (EINVAL);
-}
-
-/*
- * The `sysent' for the new syscall
- */
-static struct sysent gsstest_sysent = {
-        3,                      /* sy_narg */
-        (sy_call_t *) gsstest	/* sy_call */
-};
-
-/*
- * The offset in sysent where the syscall is allocated.
- */
-static int gsstest_offset = NO_SYSCALL;
-
-/*
- * The function called at load/unload.
- */
-
-static int
-gsstest_load(struct module *module, int cmd, void *arg)
-{
-        int error = 0;
-
-        switch (cmd) {
-        case MOD_LOAD :
-                break;
-        case MOD_UNLOAD :
-                break;
-        default :
-                error = EOPNOTSUPP;
-                break;
-        }
-        return error;
-}
-
-SYSCALL_MODULE(gsstest_syscall, &gsstest_offset, &gsstest_sysent,
-    gsstest_load, NULL);

From 3a0cdb2675e64460be17e640fd871907163342e8 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Wed, 8 Jan 2025 20:00:12 -0800
Subject: [PATCH 074/143] rpc: clean kernel RPC internal headers of non-kernel
 declarations

The files svc.h and clnt.h derive from the same files that live in
/usr/include, however there is nothing really shared between the kernel
and libc RPC implementations.  The kernel side files are not installed and
there is no reason to pollute them with the old definititions.
---
 sys/rpc/clnt.h | 285 -------------------------------------------------
 sys/rpc/svc.h  | 273 ----------------------------------------------
 2 files changed, 558 deletions(-)

diff --git a/sys/rpc/clnt.h b/sys/rpc/clnt.h
index da02137397f1dd..d9fc372709cf1e 100644
--- a/sys/rpc/clnt.h
+++ b/sys/rpc/clnt.h
@@ -41,12 +41,8 @@
 #define _RPC_CLNT_H_
 #include <rpc/clnt_stat.h>
 #include <sys/cdefs.h>
-#ifdef _KERNEL
 #include <sys/refcount.h>
 #include <rpc/netconfig.h>
-#else
-#include <netconfig.h>
-#endif
 #include <sys/un.h>
 
 /*
@@ -90,7 +86,6 @@ struct rpc_err {
 #define	re_lb		ru.RE_lb
 };
 
-#ifdef _KERNEL
 /*
  * Functions of this type may be used to receive notification when RPC
  * calls have to be re-transmitted etc.
@@ -117,7 +112,6 @@ struct rpc_callextra {
 	struct rpc_timers *rc_timers;	  /* optional RTT timers */
 	struct rpc_err	rc_err;		/* detailed call status */
 };
-#endif
 
 /*
  * Client rpc handle.
@@ -125,7 +119,6 @@ struct rpc_callextra {
  * Client is responsible for initializing auth, see e.g. auth_none.c.
  */
 typedef struct __rpc_client {
-#ifdef _KERNEL
 	volatile u_int cl_refs;			/* reference count */
 	AUTH	*cl_auth;			/* authenticator */
 	const struct clnt_ops {
@@ -149,28 +142,6 @@ typedef struct __rpc_client {
 		bool_t          (*cl_control)(struct __rpc_client *, u_int,
 				    void *);
 	} *cl_ops;
-#else
-	AUTH	*cl_auth;			/* authenticator */
-	struct clnt_ops {
-		/* call remote procedure */
-		enum clnt_stat	(*cl_call)(struct __rpc_client *,
-		    rpcproc_t, xdrproc_t, void *, xdrproc_t,
-		    void *, struct timeval);
-		/* abort a call */
-		void		(*cl_abort)(struct __rpc_client *);
-		/* get specific error code */
-		void		(*cl_geterr)(struct __rpc_client *,
-					struct rpc_err *);
-		/* frees results */
-		bool_t		(*cl_freeres)(struct __rpc_client *,
-					xdrproc_t, void *);
-		/* destroy this structure */
-		void		(*cl_destroy)(struct __rpc_client *);
-		/* the ioctl() of rpc */
-		bool_t          (*cl_control)(struct __rpc_client *, u_int,
-				    void *);
-	} *cl_ops;
-#endif
 	void 			*cl_private;	/* private stuff */
 	char			*cl_netid;	/* network token */
 	char			*cl_tp;		/* device name */
@@ -198,7 +169,6 @@ typedef struct __rpc_client {
  *
  */
 
-#ifdef _KERNEL
 #define CLNT_ACQUIRE(rh)			\
 	refcount_acquire(&(rh)->cl_refs)
 #define CLNT_RELEASE(rh)			\
@@ -246,7 +216,6 @@ enum clnt_stat clnt_call_private(CLIENT *, struct rpc_callextra *, rpcproc_t,
 #define	CLNT_CALL_EXT(rh, ext, proc, xargs, argsp, xres, resp, secs)	\
 	clnt_call_private(rh, ext, proc, xargs,				\
 		argsp, xres, resp, secs)
-#endif
 
 /*
  * enum clnt_stat
@@ -259,21 +228,12 @@ enum clnt_stat clnt_call_private(CLIENT *, struct rpc_callextra *, rpcproc_t,
  *	void *resp;
  *	struct timeval timeout;
  */
-#ifdef _KERNEL
 #define	CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs)	\
 	clnt_call_private(rh, NULL, proc, xargs,		\
 		argsp, xres, resp, secs)
 #define	clnt_call(rh, proc, xargs, argsp, xres, resp, secs)	\
 	clnt_call_private(rh, NULL, proc, xargs,		\
 		argsp, xres, resp, secs)
-#else
-#define	CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs)		\
-	((*(rh)->cl_ops->cl_call)(rh, proc, xargs,	\
-		argsp, xres, resp, secs))
-#define	clnt_call(rh, proc, xargs, argsp, xres, resp, secs)		\
-	((*(rh)->cl_ops->cl_call)(rh, proc, xargs,	\
-		argsp, xres, resp, secs))
-#endif
 
 /*
  * void
@@ -339,7 +299,6 @@ enum clnt_stat clnt_call_private(CLIENT *, struct rpc_callextra *, rpcproc_t,
 #define CLSET_ASYNC		19
 #define CLSET_CONNECT		20	/* Use connect() for UDP. (int) */
 
-#ifdef _KERNEL
 /*
  * Kernel control operations. The default msleep string is "rpcrecv",
  * and sleeps are non-interruptible by default.
@@ -362,8 +321,6 @@ struct rpc_reconupcall {
 	void	*arg;
 };
 #define	CLSET_RECONUPCALL	33	/* Reconnect upcall */
-#endif
-
 
 /*
  * void
@@ -402,8 +359,6 @@ struct rpc_reconupcall {
  * belong to the nettype namespace (/etc/netconfig).
  */
 __BEGIN_DECLS
-#ifdef _KERNEL
-
 /*
  *	struct socket *so;			-- socket
  *	struct sockaddr *svcaddr;		-- servers address
@@ -440,156 +395,6 @@ extern CLIENT *clnt_vc_create(struct socket *so,
 extern CLIENT *clnt_reconnect_create(struct netconfig *nconf,
     struct sockaddr *svcaddr, rpcprog_t program, rpcvers_t version,
     size_t sendsz, size_t recvsz);
-
-#else
-
-extern CLIENT *clnt_create(const char *, const rpcprog_t, const rpcvers_t,
-			   const char *);
-/*
- *
- * 	const char *hostname;			-- hostname
- *	const rpcprog_t prog;			-- program number
- *	const rpcvers_t vers;			-- version number
- *	const char *nettype;			-- network type
- */
-
- /*
- * Generic client creation routine. Just like clnt_create(), except
- * it takes an additional timeout parameter.
- */
-extern CLIENT * clnt_create_timed(const char *, const rpcprog_t,
-	const rpcvers_t, const char *, const struct timeval *);
-/*
- *
- *	const char *hostname;			-- hostname
- *	const rpcprog_t prog;			-- program number
- *	const rpcvers_t vers;			-- version number
- *	const char *nettype;			-- network type
- *	const struct timeval *tp;		-- timeout
- */
-
-/*
- * Generic client creation routine. Supported protocols are which belong
- * to the nettype name space.
- */
-extern CLIENT *clnt_create_vers(const char *, const rpcprog_t, rpcvers_t *,
-				const rpcvers_t, const rpcvers_t,
-				const char *);
-/*
- *	const char *host;		-- hostname
- *	const rpcprog_t prog;		-- program number
- *	rpcvers_t *vers_out;		-- servers highest available version
- *	const rpcvers_t vers_low;	-- low version number
- *	const rpcvers_t vers_high;	-- high version number
- *	const char *nettype;		-- network type
- */
-
-/*
- * Generic client creation routine. Supported protocols are which belong
- * to the nettype name space.
- */
-extern CLIENT * clnt_create_vers_timed(const char *, const rpcprog_t,
-	rpcvers_t *, const rpcvers_t, const rpcvers_t, const char *,
-	const struct timeval *);
-/*
- *	const char *host;		-- hostname
- *	const rpcprog_t prog;		-- program number
- *	rpcvers_t *vers_out;		-- servers highest available version
- *	const rpcvers_t vers_low;	-- low version number
- *	const rpcvers_t vers_high;	-- high version number
- *	const char *nettype;		-- network type
- *	const struct timeval *tp	-- timeout
- */
-
-/*
- * Generic client creation routine. It takes a netconfig structure
- * instead of nettype
- */
-extern CLIENT *clnt_tp_create(const char *, const rpcprog_t,
-			      const rpcvers_t, const struct netconfig *);
-/*
- *	const char *hostname;			-- hostname
- *	const rpcprog_t prog;			-- program number
- *	const rpcvers_t vers;			-- version number
- *	const struct netconfig *netconf; 	-- network config structure
- */
-
-/*
- * Generic client creation routine. Just like clnt_tp_create(), except
- * it takes an additional timeout parameter.
- */
-extern CLIENT * clnt_tp_create_timed(const char *, const rpcprog_t,
-	const rpcvers_t, const struct netconfig *, const struct timeval *);
-/*
- *	const char *hostname;			-- hostname
- *	const rpcprog_t prog;			-- program number
- *	const rpcvers_t vers;			-- version number
- *	const struct netconfig *netconf; 	-- network config structure
- *	const struct timeval *tp		-- timeout
- */
-
-/*
- * Generic TLI create routine. Only provided for compatibility.
- */
-
-extern CLIENT *clnt_tli_create(const int, const struct netconfig *,
-			       struct netbuf *, const rpcprog_t,
-			       const rpcvers_t, const u_int, const u_int);
-/*
- *	const int fd;			-- fd
- *	const struct netconfig *nconf;	-- netconfig structure
- *	struct netbuf *svcaddr;		-- servers address
- *	const u_long prog;			-- program number
- *	const u_long vers;			-- version number
- *	const u_int sendsz;			-- send size
- *	const u_int recvsz;			-- recv size
- */
-
-/*
- * Low level clnt create routine for connectionful transports, e.g. tcp.
- */
-extern CLIENT *clnt_vc_create(const int, const struct netbuf *,
-			      const rpcprog_t, const rpcvers_t,
-			      u_int, u_int);
-/*
- * Added for compatibility to old rpc 4.0. Obsoleted by clnt_vc_create().
- */
-extern CLIENT *clntunix_create(struct sockaddr_un *,
-			       u_long, u_long, int *, u_int, u_int);
-/*
- *	const int fd;				-- open file descriptor
- *	const struct netbuf *svcaddr;		-- servers address
- *	const rpcprog_t prog;			-- program number
- *	const rpcvers_t vers;			-- version number
- *	const u_int sendsz;			-- buffer recv size
- *	const u_int recvsz;			-- buffer send size
- */
-
-/*
- * Low level clnt create routine for connectionless transports, e.g. udp.
- */
-extern CLIENT *clnt_dg_create(const int, const struct netbuf *,
-			      const rpcprog_t, const rpcvers_t,
-			      const u_int, const u_int);
-/*
- *	const int fd;				-- open file descriptor
- *	const struct netbuf *svcaddr;		-- servers address
- *	const rpcprog_t program;		-- program number
- *	const rpcvers_t version;		-- version number
- *	const u_int sendsz;			-- buffer recv size
- *	const u_int recvsz;			-- buffer send size
- */
-
-/*
- * Memory based rpc (for speed check and testing)
- * CLIENT *
- * clnt_raw_create(prog, vers)
- *	u_long prog;
- *	u_long vers;
- */
-extern CLIENT *clnt_raw_create(rpcprog_t, rpcvers_t);
-#endif
-
 __END_DECLS
 
 
@@ -626,96 +431,6 @@ struct rpc_createerr {
 	struct rpc_err cf_error; /* useful when cf_stat == RPC_PMAPFAILURE */
 };
 
-#ifdef _KERNEL
 extern struct rpc_createerr rpc_createerr;
-#else
-__BEGIN_DECLS
-extern struct rpc_createerr	*__rpc_createerr(void);
-__END_DECLS
-#define rpc_createerr		(*(__rpc_createerr()))
-#endif
-
-#ifndef _KERNEL
-/*
- * The simplified interface:
- * enum clnt_stat
- * rpc_call(host, prognum, versnum, procnum, inproc, in, outproc, out, nettype)
- *	const char *host;
- *	const rpcprog_t prognum;
- *	const rpcvers_t versnum;
- *	const rpcproc_t procnum;
- *	const xdrproc_t inproc, outproc;
- *	const char *in;
- *	char *out;
- *	const char *nettype;
- */
-__BEGIN_DECLS
-extern enum clnt_stat rpc_call(const char *, const rpcprog_t,
-			       const rpcvers_t, const rpcproc_t,
-			       const xdrproc_t, const char *,
-			       const xdrproc_t, char *, const char *);
-__END_DECLS
-
-/*
- * RPC broadcast interface
- * The call is broadcasted to all locally connected nets.
- *
- * extern enum clnt_stat
- * rpc_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp,
- *			eachresult, nettype)
- *	const rpcprog_t		prog;		-- program number
- *	const rpcvers_t		vers;		-- version number
- *	const rpcproc_t		proc;		-- procedure number
- *	const xdrproc_t	xargs;		-- xdr routine for args
- *	caddr_t		argsp;		-- pointer to args
- *	const xdrproc_t	xresults;	-- xdr routine for results
- *	caddr_t		resultsp;	-- pointer to results
- *	const resultproc_t	eachresult;	-- call with each result
- *	const char		*nettype;	-- Transport type
- *
- * For each valid response received, the procedure eachresult is called.
- * Its form is:
- *		done = eachresult(resp, raddr, nconf)
- *			bool_t done;
- *			caddr_t resp;
- *			struct netbuf *raddr;
- *			struct netconfig *nconf;
- * where resp points to the results of the call and raddr is the
- * address if the responder to the broadcast.  nconf is the transport
- * on which the response was received.
- *
- * extern enum clnt_stat
- * rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp,
- *			eachresult, inittime, waittime, nettype)
- *	const rpcprog_t		prog;		-- program number
- *	const rpcvers_t		vers;		-- version number
- *	const rpcproc_t		proc;		-- procedure number
- *	const xdrproc_t	xargs;		-- xdr routine for args
- *	caddr_t		argsp;		-- pointer to args
- *	const xdrproc_t	xresults;	-- xdr routine for results
- *	caddr_t		resultsp;	-- pointer to results
- *	const resultproc_t	eachresult;	-- call with each result
- *	const int 		inittime;	-- how long to wait initially
- *	const int 		waittime;	-- maximum time to wait
- *	const char		*nettype;	-- Transport type
- */
-
-typedef bool_t (*resultproc_t)(caddr_t, ...);
-
-__BEGIN_DECLS
-extern enum clnt_stat rpc_broadcast(const rpcprog_t, const rpcvers_t,
-				    const rpcproc_t, const xdrproc_t,
-				    caddr_t, const xdrproc_t, caddr_t,
-				    const resultproc_t, const char *);
-extern enum clnt_stat rpc_broadcast_exp(const rpcprog_t, const rpcvers_t,
-					const rpcproc_t, const xdrproc_t,
-					caddr_t, const xdrproc_t, caddr_t,
-					const resultproc_t, const int,
-					const int, const char *);
-__END_DECLS
-
-/* For backward compatibility */
-#include <rpc/clnt_soc.h>
-#endif
 
 #endif /* !_RPC_CLNT_H_ */
diff --git a/sys/rpc/svc.h b/sys/rpc/svc.h
index 43a388984c0070..d8a8d0139cc42d 100644
--- a/sys/rpc/svc.h
+++ b/sys/rpc/svc.h
@@ -40,14 +40,12 @@
 #define _RPC_SVC_H
 #include <sys/cdefs.h>
 
-#ifdef _KERNEL
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 #include <sys/condvar.h>
 #include <sys/sysctl.h>
-#endif
 
 /*
  * This interface must manage two items concerning remote procedure calling:
@@ -95,7 +93,6 @@ struct __rpc_svcxprt;
 struct mbuf;
 
 struct xp_ops {
-#ifdef _KERNEL
 	/* receive incoming requests */
 	bool_t	(*xp_recv)(struct __rpc_svcxprt *, struct rpc_msg *,
 	    struct sockaddr **, struct mbuf **);
@@ -110,34 +107,11 @@ struct xp_ops {
 	void	(*xp_destroy)(struct __rpc_svcxprt *);
 	/* catch-all function */
 	bool_t  (*xp_control)(struct __rpc_svcxprt *, const u_int, void *);
-#else
-	/* receive incoming requests */
-	bool_t	(*xp_recv)(struct __rpc_svcxprt *, struct rpc_msg *);
-	/* get transport status */
-	enum xprt_stat (*xp_stat)(struct __rpc_svcxprt *);
-	/* get arguments */
-	bool_t	(*xp_getargs)(struct __rpc_svcxprt *, xdrproc_t, void *);
-	/* send reply */
-	bool_t	(*xp_reply)(struct __rpc_svcxprt *, struct rpc_msg *);
-	/* free mem allocated for args */
-	bool_t	(*xp_freeargs)(struct __rpc_svcxprt *, xdrproc_t, void *);
-	/* destroy this struct */
-	void	(*xp_destroy)(struct __rpc_svcxprt *);
-#endif
 };
 
-#ifndef _KERNEL
-struct xp_ops2 {
-	/* catch-all function */
-	bool_t  (*xp_control)(struct __rpc_svcxprt *, const u_int, void *);
-};
-#endif
-
-#ifdef _KERNEL
 struct __rpc_svcpool;
 struct __rpc_svcgroup;
 struct __rpc_svcthread;
-#endif
 
 /*
  * Server side transport handle. In the kernel, transports have a
@@ -151,7 +125,6 @@ struct __rpc_svcthread;
  *         end for callbacks).
  */
 typedef struct __rpc_svcxprt {
-#ifdef _KERNEL
 	volatile u_int	xp_refs;
 	struct sx	xp_lock;
 	struct __rpc_svcpool *xp_pool;  /* owning pool (see below) */
@@ -186,24 +159,6 @@ typedef struct __rpc_svcxprt {
 	uid_t		xp_uid;
 	gid_t		*xp_gidp;
 	int		xp_doneddp;
-#else
-	int		xp_fd;
-	u_short		xp_port;	 /* associated port number */
-	const struct xp_ops *xp_ops;
-	int		xp_addrlen;	 /* length of remote address */
-	struct sockaddr_in xp_raddr;	 /* remote addr. (backward ABI compat) */
-	/* XXX - fvdl stick this here for ABI backward compat reasons */
-	const struct xp_ops2 *xp_ops2;
-	char		*xp_tp;		 /* transport provider device name */
-	char		*xp_netid;	 /* network token */
-	struct netbuf	xp_ltaddr;	 /* local transport address */
-	struct netbuf	xp_rtaddr;	 /* remote transport address */
-	struct opaque_auth xp_verf;	 /* raw response verifier */
-	void		*xp_p1;		 /* private: for use by svc ops */
-	void		*xp_p2;		 /* private: for use by svc ops */
-	void		*xp_p3;		 /* private: for use by svc lib */
-	int		xp_type;	 /* transport type */
-#endif
 } SVCXPRT;
 
 /*
@@ -211,16 +166,9 @@ typedef struct __rpc_svcxprt {
  */
 typedef struct __rpc_svcauth {
 	const struct svc_auth_ops {
-#ifdef _KERNEL
 		int   (*svc_ah_wrap)(struct __rpc_svcauth *,  struct mbuf **);
 		int   (*svc_ah_unwrap)(struct __rpc_svcauth *, struct mbuf **);
 		void  (*svc_ah_release)(struct __rpc_svcauth *);
-#else
-		int   (*svc_ah_wrap)(struct __rpc_svcauth *, XDR *,
-		    xdrproc_t, caddr_t);
-		int   (*svc_ah_unwrap)(struct __rpc_svcauth *, XDR *,
-		    xdrproc_t, caddr_t);
-#endif
 	} *svc_ah_ops;
 	void *svc_ah_private;
 } SVCAUTH;
@@ -233,8 +181,6 @@ typedef struct __rpc_svcxprt_ext {
 	SVCAUTH		xp_auth;	/* interface to auth methods */
 } SVCXPRT_EXT;
 
-#ifdef _KERNEL
-
 /*
  * The services list
  * Each entry represents a set of procedures (an rpc program).
@@ -399,27 +345,6 @@ typedef struct __rpc_svcpool {
 	SVCGROUP	sp_groups[SVC_MAXGROUPS]; /* Thread/port groups. */
 } SVCPOOL;
 
-#else
-
-/*
- * Service request
- */
-struct svc_req {
-	uint32_t	rq_prog;	/* service program number */
-	uint32_t	rq_vers;	/* service protocol version */
-	uint32_t	rq_proc;	/* the desired procedure */
-	struct opaque_auth rq_cred;	/* raw creds from the wire */
-	void		*rq_clntcred;	/* read only cooked cred */
-	SVCXPRT		*rq_xprt;	/* associated transport */
-};
-
-/*
- *  Approved way of getting address of caller
- */
-#define svc_getrpccaller(x) (&(x)->xp_rtaddr)
-
-#endif
-
 /*
  * Operations defined on an SVCXPRT handle
  *
@@ -428,8 +353,6 @@ struct svc_req {
  * xdrproc_t		 xargs;
  * void *		 argsp;
  */
-#ifdef _KERNEL
-
 #define SVC_ACQUIRE(xprt)			\
 	refcount_acquire(&(xprt)->xp_refs)
 
@@ -456,43 +379,6 @@ struct svc_req {
 #define SVC_CONTROL(xprt, rq, in)			\
 	(*(xprt)->xp_ops->xp_control)((xprt), (rq), (in))
 
-#else
-
-#define SVC_RECV(xprt, msg)				\
-	(*(xprt)->xp_ops->xp_recv)((xprt), (msg))
-#define svc_recv(xprt, msg)				\
-	(*(xprt)->xp_ops->xp_recv)((xprt), (msg))
-
-#define SVC_STAT(xprt)					\
-	(*(xprt)->xp_ops->xp_stat)(xprt)
-#define svc_stat(xprt)					\
-	(*(xprt)->xp_ops->xp_stat)(xprt)
-
-#define SVC_GETARGS(xprt, xargs, argsp)			\
-	(*(xprt)->xp_ops->xp_getargs)((xprt), (xargs), (argsp))
-#define svc_getargs(xprt, xargs, argsp)			\
-	(*(xprt)->xp_ops->xp_getargs)((xprt), (xargs), (argsp))
-
-#define SVC_REPLY(xprt, msg)				\
-	(*(xprt)->xp_ops->xp_reply) ((xprt), (msg))
-#define svc_reply(xprt, msg)				\
-	(*(xprt)->xp_ops->xp_reply) ((xprt), (msg))
-
-#define SVC_FREEARGS(xprt, xargs, argsp)		\
-	(*(xprt)->xp_ops->xp_freeargs)((xprt), (xargs), (argsp))
-#define svc_freeargs(xprt, xargs, argsp)		\
-	(*(xprt)->xp_ops->xp_freeargs)((xprt), (xargs), (argsp))
-
-#define SVC_DESTROY(xprt)				\
-	(*(xprt)->xp_ops->xp_destroy)(xprt)
-#define svc_destroy(xprt)				\
-	(*(xprt)->xp_ops->xp_destroy)(xprt)
-
-#define SVC_CONTROL(xprt, rq, in)			\
-	(*(xprt)->xp_ops2->xp_control)((xprt), (rq), (in))
-
-#endif
-
 #define SVC_EXT(xprt)					\
 	((SVCXPRT_EXT *) xprt->xp_p3)
 
@@ -502,19 +388,12 @@ struct svc_req {
 /*
  * Operations defined on an SVCAUTH handle
  */
-#ifdef _KERNEL
 #define SVCAUTH_WRAP(auth, mp)		\
 	((auth)->svc_ah_ops->svc_ah_wrap(auth, mp))
 #define SVCAUTH_UNWRAP(auth, mp)	\
 	((auth)->svc_ah_ops->svc_ah_unwrap(auth, mp))
 #define SVCAUTH_RELEASE(auth)	\
 	((auth)->svc_ah_ops->svc_ah_release(auth))
-#else
-#define SVCAUTH_WRAP(auth, xdrs, xfunc, xwhere)		\
-	((auth)->svc_ah_ops->svc_ah_wrap(auth, xdrs, xfunc, xwhere))
-#define SVCAUTH_UNWRAP(auth, xdrs, xfunc, xwhere)	\
-	((auth)->svc_ah_ops->svc_ah_unwrap(auth, xdrs, xfunc, xwhere))
-#endif
 
 /*
  * Service registration
@@ -542,14 +421,9 @@ __END_DECLS
  */
 
 __BEGIN_DECLS
-#ifdef _KERNEL
 extern void	svc_unreg(SVCPOOL *, const rpcprog_t, const rpcvers_t);
-#else
-extern void	svc_unreg(const rpcprog_t, const rpcvers_t);
-#endif
 __END_DECLS
 
-#ifdef _KERNEL
 /*
  * Service connection loss registration
  *
@@ -573,7 +447,6 @@ __END_DECLS
 __BEGIN_DECLS
 extern void	svc_loss_unreg(SVCPOOL *, void (*)(SVCXPRT *));
 __END_DECLS
-#endif
 
 /*
  * Transport registration.
@@ -596,8 +469,6 @@ extern void	xprt_unregister(SVCXPRT *);
 extern void	__xprt_unregister_unlocked(SVCXPRT *);
 __END_DECLS
 
-#ifdef _KERNEL
-
 /*
  * Called when a transport has pending requests.
  */
@@ -608,8 +479,6 @@ extern void	xprt_inactive_locked(SVCXPRT *);
 extern void	xprt_inactive_self(SVCXPRT *);
 __END_DECLS
 
-#endif
-
 /*
  * When the service routine is called, it must first check to see if it
  * knows about the procedure;  if not, it should call svcerr_noproc
@@ -637,7 +506,6 @@ __END_DECLS
  */
 
 __BEGIN_DECLS
-#ifdef _KERNEL
 extern bool_t	svc_sendreply(struct svc_req *, xdrproc_t, void *);
 extern bool_t	svc_sendreply_mbuf(struct svc_req *, struct mbuf *);
 extern void	svcerr_decode(struct svc_req *);
@@ -647,16 +515,6 @@ extern void	svcerr_progvers(struct svc_req *, rpcvers_t, rpcvers_t);
 extern void	svcerr_auth(struct svc_req *, enum auth_stat);
 extern void	svcerr_noprog(struct svc_req *);
 extern void	svcerr_systemerr(struct svc_req *);
-#else
-extern bool_t	svc_sendreply(SVCXPRT *, xdrproc_t, void *);
-extern void	svcerr_decode(SVCXPRT *);
-extern void	svcerr_weakauth(SVCXPRT *);
-extern void	svcerr_noproc(SVCXPRT *);
-extern void	svcerr_progvers(SVCXPRT *, rpcvers_t, rpcvers_t);
-extern void	svcerr_auth(SVCXPRT *, enum auth_stat);
-extern void	svcerr_noprog(SVCXPRT *);
-extern void	svcerr_systemerr(SVCXPRT *);
-#endif
 extern int	rpc_reg(rpcprog_t, rpcvers_t, rpcproc_t,
 			char *(*)(char *), xdrproc_t, xdrproc_t,
 			char *);
@@ -673,20 +531,6 @@ __END_DECLS
  * "in-place" results of a select system call (see select, section 2).
  */
 
-#ifndef _KERNEL
-/*
- * Global keeper of rpc service descriptors in use
- * dynamic; must be inspected before each call to select
- */
-extern int svc_maxfd;
-#ifdef FD_SETSIZE
-extern fd_set svc_fdset;
-#define svc_fds svc_fdset.fds_bits[0]	/* compatibility */
-#else
-extern int svc_fds;
-#endif /* def FD_SETSIZE */
-#endif
-
 /*
  * a small program implemented by the svc_rpc implementation itself;
  * also see clnt.h for protocol numbers.
@@ -698,22 +542,11 @@ __END_DECLS
 __BEGIN_DECLS
 extern SVCXPRT *svc_xprt_alloc(void);
 extern void	svc_xprt_free(SVCXPRT *);
-#ifndef _KERNEL
-extern void	svc_getreq(int);
-extern void	svc_getreqset(fd_set *);
-extern void	svc_getreq_common(int);
-struct pollfd;
-extern void	svc_getreq_poll(struct pollfd *, int);
-extern void	svc_run(void);
-extern void	svc_exit(void);
-#else
 extern void	svc_run(SVCPOOL *);
 extern void	svc_exit(SVCPOOL *);
 extern bool_t	svc_getargs(struct svc_req *, xdrproc_t, void *);
 extern bool_t	svc_freeargs(struct svc_req *, xdrproc_t, void *);
 extern void	svc_freereq(struct svc_req *);
-
-#endif
 __END_DECLS
 
 /*
@@ -728,8 +561,6 @@ __END_DECLS
 
 __BEGIN_DECLS
 
-#ifdef _KERNEL
-
 /*
  * Create a new service pool.
  */
@@ -812,110 +643,6 @@ extern SVCXPRT *svc_tli_create(SVCPOOL *, const struct netconfig *,
  *      const size_t sendsz;             -- max sendsize
  *      const size_t recvsz;             -- max recvsize
  */
-
-#else /* !_KERNEL */
-
-/*
- * Transport independent svc_create routine.
- */
-extern int svc_create(void (*)(struct svc_req *, SVCXPRT *),
-			   const rpcprog_t, const rpcvers_t, const char *);
-/*
- *      void (*dispatch)();             -- dispatch routine
- *      const rpcprog_t prognum;        -- program number
- *      const rpcvers_t versnum;        -- version number
- *      const char *nettype;            -- network type
- */
-
-
-/*
- * Generic server creation routine. It takes a netconfig structure
- * instead of a nettype.
- */
-
-extern SVCXPRT *svc_tp_create(void (*)(struct svc_req *, SVCXPRT *),
-				   const rpcprog_t, const rpcvers_t,
-				   const struct netconfig *);
-        /*
-         * void (*dispatch)();            -- dispatch routine
-         * const rpcprog_t prognum;       -- program number
-         * const rpcvers_t versnum;       -- version number
-         * const struct netconfig *nconf; -- netconfig structure
-         */
-
-/*
- * Generic TLI create routine
- */
-extern SVCXPRT *svc_tli_create(const int, const struct netconfig *,
-			       const struct t_bind *, const u_int,
-			       const u_int);
-/*
- *      const int fd;                   -- connection end point
- *      const struct netconfig *nconf;  -- netconfig structure for network
- *      const struct t_bind *bindaddr;  -- local bind address
- *      const u_int sendsz;             -- max sendsize
- *      const u_int recvsz;             -- max recvsize
- */
-
-/*
- * Connectionless and connectionful create routines
- */
-
-extern SVCXPRT *svc_vc_create(const int, const u_int, const u_int);
-/*
- *      const int fd;                           -- open connection end point
- *      const u_int sendsize;                   -- max send size
- *      const u_int recvsize;                   -- max recv size
- */
-
-/*
- * Added for compatibility to old rpc 4.0. Obsoleted by svc_vc_create().
- */
-extern SVCXPRT *svcunix_create(int, u_int, u_int, char *);
-
-extern SVCXPRT *svc_dg_create(const int, const u_int, const u_int);
-        /*
-         * const int fd;                                -- open connection
-         * const u_int sendsize;                        -- max send size
-         * const u_int recvsize;                        -- max recv size
-         */
-
-
-/*
- * the routine takes any *open* connection
- * descriptor as its first input and is used for open connections.
- */
-extern SVCXPRT *svc_fd_create(const int, const u_int, const u_int);
-/*
- *      const int fd;                           -- open connection end point
- *      const u_int sendsize;                   -- max send size
- *      const u_int recvsize;                   -- max recv size
- */
-
-/*
- * Added for compatibility to old rpc 4.0. Obsoleted by svc_fd_create().
- */
-extern SVCXPRT *svcunixfd_create(int, u_int, u_int);
-
-/*
- * Memory based rpc (for speed check and testing)
- */
-extern SVCXPRT *svc_raw_create(void);
-
-/*
- * svc_dg_enable_cache() enables the cache on dg transports.
- */
-int svc_dg_enablecache(SVCXPRT *, const u_int);
-
-int __rpc_get_local_uid(SVCXPRT *_transp, uid_t *_uid);
-
-#endif	/* !_KERNEL */
-
 __END_DECLS
 
-#ifndef _KERNEL
-/* for backward compatibility */
-#include <rpc/svc_soc.h>
-#endif
-
 #endif /* !_RPC_SVC_H */

From ebb36fcaae4dfd3720e43f8e6a9613e5d84227e0 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Wed, 8 Jan 2025 20:00:12 -0800
Subject: [PATCH 075/143] rpc: remove svc_create(), it is not used

---
 sys/rpc/svc.h         | 13 --------
 sys/rpc/svc_generic.c | 70 -------------------------------------------
 2 files changed, 83 deletions(-)

diff --git a/sys/rpc/svc.h b/sys/rpc/svc.h
index d8a8d0139cc42d..92755a1984883f 100644
--- a/sys/rpc/svc.h
+++ b/sys/rpc/svc.h
@@ -578,19 +578,6 @@ extern void svcpool_destroy(SVCPOOL *pool);
  */
 extern void svcpool_close(SVCPOOL *pool);
 
-/*
- * Transport independent svc_create routine.
- */
-extern int svc_create(SVCPOOL *, void (*)(struct svc_req *, SVCXPRT *),
-    const rpcprog_t, const rpcvers_t, const char *);
-/*
- *      void (*dispatch)();             -- dispatch routine
- *      const rpcprog_t prognum;        -- program number
- *      const rpcvers_t versnum;        -- version number
- *      const char *nettype;            -- network type
- */
-
-
 /*
  * Generic server creation routine. It takes a netconfig structure
  * instead of a nettype.
diff --git a/sys/rpc/svc_generic.c b/sys/rpc/svc_generic.c
index 6fb43dc5c9406f..12c96eca27c871 100644
--- a/sys/rpc/svc_generic.c
+++ b/sys/rpc/svc_generic.c
@@ -64,76 +64,6 @@
 
 extern int __svc_vc_setflag(SVCXPRT *, int);
 
-/*
- * The highest level interface for server creation.
- * It tries for all the nettokens in that particular class of token
- * and returns the number of handles it can create and/or find.
- *
- * It creates a link list of all the handles it could create.
- * If svc_create() is called multiple times, it uses the handle
- * created earlier instead of creating a new handle every time.
- */
-int
-svc_create(
-	SVCPOOL *pool,
-	void (*dispatch)(struct svc_req *, SVCXPRT *),
-	rpcprog_t prognum,		/* Program number */
-	rpcvers_t versnum,		/* Version number */
-	const char *nettype)		/* Networktype token */
-{
-	int g, num = 0;
-	SVCGROUP *grp;
-	SVCXPRT *xprt;
-	struct netconfig *nconf;
-	void *handle;
-
-	if ((handle = __rpc_setconf(nettype)) == NULL) {
-		printf("svc_create: unknown protocol");
-		return (0);
-	}
-	while ((nconf = __rpc_getconf(handle)) != NULL) {
-		for (g = 0; g < SVC_MAXGROUPS; g++) {
-			grp = &pool->sp_groups[g];
-			mtx_lock(&grp->sg_lock);
-			TAILQ_FOREACH(xprt, &grp->sg_xlist, xp_link) {
-				if (strcmp(xprt->xp_netid, nconf->nc_netid))
-					continue;
-				/* Found an old one, use it */
-				mtx_unlock(&grp->sg_lock);
-				(void) rpcb_unset(prognum, versnum, nconf);
-				if (svc_reg(xprt, prognum, versnum,
-					dispatch, nconf) == FALSE) {
-					printf(
-		"svc_create: could not register prog %u vers %u on %s\n",
-					(unsigned)prognum, (unsigned)versnum,
-					 nconf->nc_netid);
-					mtx_lock(&grp->sg_lock);
-				} else {
-					num++;
-					mtx_lock(&grp->sg_lock);
-					break;
-				}
-			}
-			mtx_unlock(&grp->sg_lock);
-		}
-		if (xprt == NULL) {
-			/* It was not found. Now create a new one */
-			xprt = svc_tp_create(pool, dispatch, prognum, versnum,
-			    NULL, nconf);
-			if (xprt) {
-				num++;
-				SVC_RELEASE(xprt);
-			}
-		}
-	}
-	__rpc_endconf(handle);
-	/*
-	 * In case of num == 0; the error messages are generated by the
-	 * underlying layers; and hence not needed here.
-	 */
-	return (num);
-}
-
 /*
  * The high level interface to svc_tli_create().
  * It tries to create a server for "nconf" and registers the service

From 8e8f8d86e305fe1e90fcfc64c1958e61b359f4f4 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Wed, 8 Jan 2025 20:00:12 -0800
Subject: [PATCH 076/143] rpcbind: remove extraneous check for nconf not being
 unix(4)

We are already inside an if block with exactly same predicate.
---
 usr.sbin/rpcbind/rpcbind.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/usr.sbin/rpcbind/rpcbind.c b/usr.sbin/rpcbind/rpcbind.c
index 44adae366bede3..a836afd24009f9 100644
--- a/usr.sbin/rpcbind/rpcbind.c
+++ b/usr.sbin/rpcbind/rpcbind.c
@@ -416,18 +416,14 @@ init_transport(struct netconfig *nconf)
 		 */
 		if (strcmp("*", hosts[nhostsbak]) == 0)
 		    hosts[nhostsbak] = NULL;
-		if ((strcmp(nconf->nc_netid, "local") != 0) &&
-		    (strcmp(nconf->nc_netid, "unix") != 0)) {
-		    if ((aicode = getaddrinfo(hosts[nhostsbak],
-			servname, &hints, &res)) != 0) {
-			syslog(LOG_ERR,
-			    "cannot get local address for %s: %s",
+		if ((aicode = getaddrinfo(hosts[nhostsbak], servname, &hints,
+		    &res)) != 0) {
+			syslog(LOG_ERR, "cannot get local address for %s: %s",
 			    nconf->nc_netid, gai_strerror(aicode));
 			continue;
-		    }
-		    addrlen = res->ai_addrlen;
-		    sa = (struct sockaddr *)res->ai_addr;
 		}
+		addrlen = res->ai_addrlen;
+		sa = (struct sockaddr *)res->ai_addr;
 		oldmask = umask(S_IXUSR|S_IXGRP|S_IXOTH);
 		if (bind(fd, sa, addrlen) != 0) {
 		    syslog(LOG_ERR, "cannot bind %s on %s: %m",

From e7fbf52a3e38c4bc4249e6541fe7e42ecc119656 Mon Sep 17 00:00:00 2001
From: Michael Tuexen <tuexen@FreeBSD.org>
Date: Thu, 9 Jan 2025 06:27:05 +0100
Subject: [PATCH 077/143] TCP BBR: remove dead code

No functional change intended.

Reviewed by:		Peter Lei, rrs (earlier version)
CID:			1523802
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D48341
---
 sys/netinet/tcp_stacks/bbr.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 4600088bd1a119..17a0744961ce83 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -6781,8 +6781,6 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
 			t = cts - rsm->r_tim_lastsent[0];
 		else
 			t = 1;
-		if ((int)t <= 0)
-			t = 1;
 		bbr->r_ctl.rc_last_rtt = t;
 		bbr_update_bbr_info(bbr, rsm, t, cts, to->to_tsecr, 0,
 				    BBR_RTT_BY_EXACTMATCH, rsm->r_tim_lastsent[0], ack_type, to);
@@ -6823,8 +6821,6 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
 					t = cts - rsm->r_tim_lastsent[i];
 				else
 					t = 1;
-				if ((int)t <= 0)
-					t = 1;
 				bbr->r_ctl.rc_last_rtt = t;
 				bbr_update_bbr_info(bbr, rsm, t, cts, to->to_tsecr, uts, BBR_RTT_BY_TSMATCHING,
 						    rsm->r_tim_lastsent[i], ack_type, to);

From 1b4e1171315398decb1ad3fceffcacf29cff218b Mon Sep 17 00:00:00 2001
From: Emmanuel Vadot <manu@FreeBSD.org>
Date: Tue, 7 Jan 2025 09:57:57 +0100
Subject: [PATCH 078/143] loader: Fix orb position

Fix the orb position to be aligned with the menu

Differential Revision:	https://reviews.freebsd.org/D48353
Reviewed by:	imp, tsoome
Sponsored by:	Beckhoff Automation GmbH & Co. KG
---
 stand/lua/drawer.lua    | 4 ++--
 stand/lua/gfx-orb.lua   | 2 +-
 stand/lua/gfx-orbbw.lua | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/stand/lua/drawer.lua b/stand/lua/drawer.lua
index e55702ffee6c52..2d04e29ac46234 100644
--- a/stand/lua/drawer.lua
+++ b/stand/lua/drawer.lua
@@ -470,9 +470,9 @@ logodefs = {
 }
 
 brand_position = {x = 2, y = 1}
-logo_position = {x = 46, y = 4}
+logo_position = {x = 40, y = 10}
 menu_position = {x = 5, y = 10}
-frame_size = {w = 42, h = 14}
+frame_size = {w = 39, h = 14}
 default_shift = {x = 0, y = 0}
 shift = default_shift
 
diff --git a/stand/lua/gfx-orb.lua b/stand/lua/gfx-orb.lua
index 00f4aeb3bcebd1..cd834a2d6b8eca 100644
--- a/stand/lua/gfx-orb.lua
+++ b/stand/lua/gfx-orb.lua
@@ -45,7 +45,7 @@ return {
 		    "         .---.....----.\027[m",
 		},
 		requires_color = true,
-		shift = {x = 2, y = 3},
+		shift = {x = 2, y = -1},
 		image = "/boot/images/freebsd-logo-rev.png",
 		image_rl = 15
 	}
diff --git a/stand/lua/gfx-orbbw.lua b/stand/lua/gfx-orbbw.lua
index 93ffd2366196a9..a97174a6a5a4b7 100644
--- a/stand/lua/gfx-orbbw.lua
+++ b/stand/lua/gfx-orbbw.lua
@@ -44,6 +44,6 @@ return {
 		    "      .--             `--.",
 		    "         .---.....----.",
 		},
-		shift = {x = 2, y = 4},
+		shift = {x = 2, y = -1},
 	}
 }

From ee233742a5697f64d0f1d722b5e73ff2c5998c62 Mon Sep 17 00:00:00 2001
From: Emmanuel Vadot <manu@FreeBSD.org>
Date: Tue, 7 Jan 2025 10:34:35 +0100
Subject: [PATCH 079/143] loader: Rework kernel menu section

With pkgbase we can have long kernel name, so create a new section
for the kernel name.
Do not show the "default" text, we already show the "1 of X" part at
the end of the line and the default kernel is always number 1 so it's a bit
redundant.

Differential Revision:	https://reviews.freebsd.org/D48354
Reviewed by:	imp, tsoome
Sponsored by:	Beckhoff Automation GmbH & Co. KG
---
 stand/lua/menu.lua | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/stand/lua/menu.lua b/stand/lua/menu.lua
index 2d92be3b7c6e27..7c36b6c8d3c8ec 100644
--- a/stand/lua/menu.lua
+++ b/stand/lua/menu.lua
@@ -255,9 +255,16 @@ menu.welcome = {
 			},
 			{
 				entry_type = core.MENU_SEPARATOR,
-				name = "Options:",
+				name = "Kernel:",
 			},
 			menu_entries.kernel_options,
+			{
+				entry_type = core.MENU_SEPARATOR,
+			},
+			{
+				entry_type = core.MENU_SEPARATOR,
+				name = "Options:",
+			},
 			menu_entries.boot_options,
 			menu_entries.zpool_checkpoints,
 			menu_entries.boot_envs,
@@ -332,22 +339,19 @@ menu.welcome = {
 			items = core.kernelList,
 			name = function(idx, choice, all_choices)
 				if #all_choices == 0 then
-					return "Kernel: "
+					return ""
 				end
 
-				local is_default = (idx == 1)
-				local kernel_name = ""
+				local kernel_name
 				local name_color
-				if is_default then
+				if idx == 1 then
 					name_color = color.escapefg(color.GREEN)
-					kernel_name = "default/"
 				else
 					name_color = color.escapefg(color.CYAN)
 				end
-				kernel_name = kernel_name .. name_color ..
-				    choice .. color.resetfg()
-				return color.highlight("K") .. "ernel: " ..
-				    kernel_name .. " (" .. idx .. " of " ..
+				kernel_name = name_color .. choice ..
+				    color.resetfg()
+				return kernel_name .. " (" .. idx .. " of " ..
 				    #all_choices .. ")"
 			end,
 			func = function(_, choice, _)

From a8d9bd3fa5855fe7583ed05946296ab6b9937d69 Mon Sep 17 00:00:00 2001
From: Baptiste Daroussin <bapt@FreeBSD.org>
Date: Wed, 8 Jan 2025 12:13:54 +0100
Subject: [PATCH 080/143] bintrans(1): qp switch to getopt_long

In preparation for more arguments, switch bintrans qp argument parsing
to getopt_long, while here make the decodign argument being -d|--decode
for compatibility with base64 encoding/decoding

MFC After:	1 week
Reviewed by:	pstef
Differential Revision:	https://reviews.freebsd.org/D48380
---
 usr.bin/bintrans/bintrans.1 |  4 +--
 usr.bin/bintrans/qp.c       | 61 +++++++++++++++++++++----------------
 2 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/usr.bin/bintrans/bintrans.1 b/usr.bin/bintrans/bintrans.1
index 3376ecd332edcc..4177a5c6b9ebec 100644
--- a/usr.bin/bintrans/bintrans.1
+++ b/usr.bin/bintrans/bintrans.1
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd January 23, 2024
+.Dd January 8, 2025
 .Dt BINTRANS 1
 .Os
 .Sh NAME
@@ -230,7 +230,7 @@ through a dedicated program:
 is a quoted-printable converter
 and accepts the following options:
 .Bl -tag -width indent
-.It Fl u
+.It Fl d
 Decode.
 .It Fl o Ar output_file
 Output to
diff --git a/usr.bin/bintrans/qp.c b/usr.bin/bintrans/qp.c
index c2c9dfa7a224b7..3bff47945acf9e 100644
--- a/usr.bin/bintrans/qp.c
+++ b/usr.bin/bintrans/qp.c
@@ -26,6 +26,7 @@
  */
 
 #include <ctype.h>
+#include <getopt.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
@@ -151,44 +152,50 @@ static void
 usage(void)
 {
 	fprintf(stderr,
-	   "usage: bintrans qp [-u] [-o outputfile] [file name]\n");
+	   "usage: bintrans qp [-d] [-o outputfile] [file name]\n");
 }
 
 int
 main_quotedprintable(int argc, char *argv[])
 {
-	int i;
+	int ch;
 	bool encode = true;
 	FILE *fp = stdin;
 	FILE *fpo = stdout;
 
-	for (i = 1; i < argc; ++i) {
-		if (argv[i][0] == '-') {
-			switch (argv[i][1]) {
-			case 'o':
-				if (++i >= argc) {
-					fprintf(stderr, "qp: -o requires a file name.\n");
-					exit(EXIT_FAILURE);
-				}
-				fpo = fopen(argv[i], "w");
-				if (fpo == NULL) {
-					perror(argv[i]);
-					exit(EXIT_FAILURE);
-				}
-				break;
-			case 'u':
-				encode = false;
-				break;
-			default:
-				usage();
-				exit(EXIT_FAILURE);
-			}
-		} else {
-			fp = fopen(argv[i], "r");
-			if (fp == NULL) {
-				perror(argv[i]);
+	static const struct option opts[] =
+	{
+		{ "decode", no_argument,		NULL, 'd'},
+		{ "output", required_argument,		NULL, 'o'},
+		{NULL,		no_argument,		NULL, 0}
+	};
+
+	while ((ch = getopt_long(argc, argv, "do:u", opts, NULL)) != -1) {
+		switch(ch) {
+		case 'o':
+			fpo = fopen(optarg, "w");
+			if (fpo == NULL) {
+				perror(optarg);
 				exit(EXIT_FAILURE);
 			}
+			break;
+		case 'u':
+			/* FALLTHROUGH for backward compatibility */
+		case 'd':
+			encode = false;
+			break;
+		default:
+			usage();
+			exit(EXIT_FAILURE);
+		}
+	};
+	argc -= optind;
+	argv += optind;
+	if (argc > 0) {
+		fp = fopen(argv[0], "r");
+		if (fp == NULL) {
+			perror(argv[0]);
+			exit(EXIT_FAILURE);
 		}
 	}
 	qp(fp, fpo, encode);

From beab8b1ddf86a88e0605562f2cc1d6a26c68604f Mon Sep 17 00:00:00 2001
From: Baptiste Daroussin <bapt@FreeBSD.org>
Date: Wed, 8 Jan 2025 13:39:30 +0100
Subject: [PATCH 081/143] bintrans(1): RFC2047 variant of quoted print

MFC After:	1 week
Reviewed by:	pstref
Differential Revision:	https://reviews.freebsd.org/D48381
---
 usr.bin/bintrans/bintrans.1 |  2 ++
 usr.bin/bintrans/qp.c       | 32 +++++++++++++++++++++++---------
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/usr.bin/bintrans/bintrans.1 b/usr.bin/bintrans/bintrans.1
index 4177a5c6b9ebec..02571067c8f0c0 100644
--- a/usr.bin/bintrans/bintrans.1
+++ b/usr.bin/bintrans/bintrans.1
@@ -236,6 +236,8 @@ Decode.
 Output to
 .Ar output_file
 instead of standard output.
+.It fl r
+Encode/Decode in RFC2047 specific variant.
 .El
 .Sh EXAMPLES
 The following example packages up a source tree, compresses it,
diff --git a/usr.bin/bintrans/qp.c b/usr.bin/bintrans/qp.c
index 3bff47945acf9e..862db437f4e066 100644
--- a/usr.bin/bintrans/qp.c
+++ b/usr.bin/bintrans/qp.c
@@ -51,7 +51,7 @@ decode_char(const char *s)
 
 
 static void
-decode_quoted_printable(const char *body, FILE *fpo)
+decode_quoted_printable(const char *body, FILE *fpo, bool rfc2047)
 {
 	while (*body != '\0') {
 		switch (*body) {
@@ -80,6 +80,12 @@ decode_quoted_printable(const char *body, FILE *fpo)
 			fputc(decode_char(body), fpo);
 			body += 2;
 			break;
+		case '_':
+			if (rfc2047) {
+				fputc(0x20, fpo);
+				break;
+			}
+			/* FALLTHROUGH */
 		default:
 			fputc(*body, fpo);
 			break;
@@ -89,7 +95,7 @@ decode_quoted_printable(const char *body, FILE *fpo)
 }
 
 static void
-encode_quoted_printable(const char *body, FILE *fpo)
+encode_quoted_printable(const char *body, FILE *fpo, bool rfc2047)
 {
 	const char *end = body + strlen(body);
 	size_t linelen = 0;
@@ -111,7 +117,10 @@ encode_quoted_printable(const char *body, FILE *fpo)
 			if ((*body == ' ' || *body == '\t') &&
 			    body + 1 < end &&
 			    (body[1] != '\n' && body[1] != '\r')) {
-				fputc(*body, fpo);
+				if (*body == 0x20 && rfc2047)
+					fputc('_', fpo);
+				else
+					fputc(*body, fpo);
 				prev = *body;
 			} else {
 				fprintf(fpo, "=%02X", (unsigned char)*body);
@@ -135,16 +144,16 @@ encode_quoted_printable(const char *body, FILE *fpo)
 }
 
 static void
-qp(FILE *fp, FILE *fpo, bool encode)
+qp(FILE *fp, FILE *fpo, bool encode, bool rfc2047)
 {
 	char *line = NULL;
 	size_t linecap = 0;
-	void (*codec)(const char *line, FILE *f);
+	void (*codec)(const char *line, FILE *f, bool rfc2047);
 
 	codec = encode ? encode_quoted_printable : decode_quoted_printable ;
 
 	while (getline(&line, &linecap, fp) > 0)
-		codec(line, fpo);
+		codec(line, fpo, rfc2047);
 	free(line);
 }
 
@@ -152,7 +161,7 @@ static void
 usage(void)
 {
 	fprintf(stderr,
-	   "usage: bintrans qp [-d] [-o outputfile] [file name]\n");
+	   "usage: bintrans qp [-d] [-r] [-o outputfile] [file name]\n");
 }
 
 int
@@ -160,6 +169,7 @@ main_quotedprintable(int argc, char *argv[])
 {
 	int ch;
 	bool encode = true;
+	bool rfc2047 = false;
 	FILE *fp = stdin;
 	FILE *fpo = stdout;
 
@@ -167,10 +177,11 @@ main_quotedprintable(int argc, char *argv[])
 	{
 		{ "decode", no_argument,		NULL, 'd'},
 		{ "output", required_argument,		NULL, 'o'},
+		{ "rfc2047", no_argument,		NULL, 'r'},
 		{NULL,		no_argument,		NULL, 0}
 	};
 
-	while ((ch = getopt_long(argc, argv, "do:u", opts, NULL)) != -1) {
+	while ((ch = getopt_long(argc, argv, "do:ru", opts, NULL)) != -1) {
 		switch(ch) {
 		case 'o':
 			fpo = fopen(optarg, "w");
@@ -184,6 +195,9 @@ main_quotedprintable(int argc, char *argv[])
 		case 'd':
 			encode = false;
 			break;
+		case 'r':
+			rfc2047 = true;
+			break;
 		default:
 			usage();
 			exit(EXIT_FAILURE);
@@ -198,7 +212,7 @@ main_quotedprintable(int argc, char *argv[])
 			exit(EXIT_FAILURE);
 		}
 	}
-	qp(fp, fpo, encode);
+	qp(fp, fpo, encode, rfc2047);
 
 	return (EXIT_SUCCESS);
 }

From 4413d9f3775118c4d8c082a404dd7eb04fe9636a Mon Sep 17 00:00:00 2001
From: Baptiste Daroussin <bapt@FreeBSD.org>
Date: Thu, 9 Jan 2025 10:47:22 +0100
Subject: [PATCH 082/143] usb_vendors: update to 2024.12.04

---
 share/misc/usb_vendors | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/share/misc/usb_vendors b/share/misc/usb_vendors
index 41b367d1a0d6bb..3304de282c1853 100644
--- a/share/misc/usb_vendors
+++ b/share/misc/usb_vendors
@@ -9,8 +9,8 @@
 #	The latest version can be obtained from
 #		http://www.linux-usb.org/usb.ids
 #
-# Version: 2024.07.04
-# Date:    2024-07-04 20:34:02
+# Version: 2024.12.04
+# Date:    2024-12-04 20:34:02
 #
 
 # Vendors, devices and interfaces. Please keep sorted.
@@ -16876,7 +16876,7 @@
 	0256  Schwalm & Tate LLC pISO Raspberry Pi Hat
 	053a  Hackerspace San Salvador HSSV SAMR21-Mote
 	0cbd  Andrzej Szombierski kuku.eu.org keyboard
-	0d32  ODrive Robotics ODrive v3
+	0d32  ODrive Robotics ODrive
 	1001  InterBiometrics Hub
 	1002  InterBiometrics Relais
 	1003  InterBiometrics IBSecureCam-P

From 2f82bf3521f955c0ef9cc0019b7f86c13020660c Mon Sep 17 00:00:00 2001
From: Baptiste Daroussin <bapt@FreeBSD.org>
Date: Thu, 9 Jan 2025 10:47:59 +0100
Subject: [PATCH 083/143] pci_vendors: update to 2024.11.25

---
 share/misc/pci_vendors | 602 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 509 insertions(+), 93 deletions(-)

diff --git a/share/misc/pci_vendors b/share/misc/pci_vendors
index 968338dd109999..0eebacf92d410d 100644
--- a/share/misc/pci_vendors
+++ b/share/misc/pci_vendors
@@ -1,8 +1,8 @@
 #
 #	List of PCI ID's
 #
-#	Version: 2024.09.20
-#	Date:    2024-09-20 03:15:02
+#	Version: 2024.11.25
+#	Date:    2024-11-25 03:15:02
 #
 #	Maintained by Albert Pool, Martin Mares, and other volunteers from
 #	the PCI ID Project at https://pci-ids.ucw.cz/.
@@ -46,13 +46,20 @@
 	7a10  Hyper Transport Bridge Controller
 	7a14  EHCI USB Controller
 	7a15  Vivante GPU (Graphics Processing Unit)
+	7a18  SATA 3 AHCI Controller
 	7a19  PCI-to-PCI Bridge
+	7a1b  SPI Controller
 	7a24  OHCI USB Controller
 # Found on 7A2000 PCH
 	7a25  LG100 GPU
 	7a29  PCI-to-PCI Bridge
+	7a34  xHCI USB Controller
 # Found on 7A2000 PCH
 	7a36  Display Controller
+	7a39  PCIe x1 Root Port
+	7a49  PCIe x4 Root Port
+	7a59  PCIe x8 Root Port
+	7a69  PCIe x16 Root Port
 0018  Fn-Link Technology Limited
 	6252  6252CPUB 802.11ax PCIe Wireless Network Adapter
 001c  PEAK-System Technik GmbH
@@ -3992,6 +3999,7 @@
 # Reference
 		1002 0e3a  Radeon RX 6950 XT
 		1849 5230  Navi 21 [ASRock OC Forumla Radeon RX 6950XT]
+		1849 5238  Navi 21 [ASRock Radeon RX 6950 XT Phantom Gaming OC]
 		1da2 441d  Navi 21 [Sapphire Nitro+ Radeon RX 6950 XT]
 		1eae 6950  Navi 21 [XFX Speedster MERC319 Radeon RX 6950 XT]
 	73ab  Navi 21 Pro-XLA [Radeon Pro W6800X/Radeon Pro W6800X Duo]
@@ -4055,11 +4063,13 @@
 	744c  Navi 31 [Radeon RX 7900 XT/7900 XTX/7900 GRE/7900M]
 		1002 0e3b  RX 7900 XTX / RX 7900 GRE [XFX]
 		1043 0506  TUF Gaming Radeon RX 7900 XTX OC
+		148c 2425  HELLHOUND RX 7900 GRE
 		1849 5304  Radeon RX 7900 XTX
 		1da2 471e  PULSE RX 7900 XTX
 		1da2 475e  PULSE RX 7900 GRE
 		1da2 e471  NITRO+ RX 7900 XTX Vapor-X
 		1eae 7901  RX-79XMERCB9 [SPEEDSTER MERC 310 RX 7900 XTX]
+		1eae 790a  RX-79GMERCBR [XFX RX 7900 GRE]
 	745e  Navi 31 [Radeon Pro W7800]
 	7460  Navi32 GL-XL [AMD Radeon PRO V710]
 	7461  Navi 32 [AMD Radeon PRO V710]
@@ -4075,7 +4085,11 @@
 	7499  Navi 33 [Radeon RX 7400/7300/Pro W7400]
 	74a0  Aqua Vanjaram [Instinct MI300A]
 	74a1  Aqua Vanjaram [Instinct MI300X]
+	74a2  Aqua Vanjaram [Instinct MI308X]
+	74a5  Aqua Vanjaram [Instinct MI325X]
+	74a9  Aqua Vanjaram [Instinct MI300X HF]
 	74b5  Aqua Vanjaram [Instinct MI300X VF]
+	74bd  Aqua Vanjaram [Instinct MI300X HF]
 	7833  RS350 Host Bridge
 	7834  RS350 [Radeon 9100 PRO/XT IGP]
 	7835  RS350M [Mobility Radeon 9000 IGP]
@@ -4434,7 +4448,6 @@
 	99a2  Trinity 2 [Radeon HD 7420G]
 	99a4  Trinity 2 [Radeon HD 7400G]
 	aa00  R600 HDMI Audio [Radeon HD 2900 GT/PRO/XT]
-	aa01  RV635 HDMI Audio [Radeon HD 3650/3730/3750]
 	aa08  RV630 HDMI Audio [Radeon HD 2600 PRO/XT / HD 3610]
 	aa10  RV610 HDMI Audio [Radeon HD 2350 PRO / 2400 PRO/XT / HD 3410]
 		174b aa10  Radeon HD 2400 PRO
@@ -5007,6 +5020,14 @@
 	1202  Family 10h Processor DRAM Controller
 	1203  Family 10h Processor Miscellaneous Control
 	1204  Family 10h Processor Link Control
+	12c0  Turin Data Fabric; Function 0
+	12c1  Turin Data Fabric; Function 1
+	12c2  Turin Data Fabric; Function 2
+	12c3  Turin Data Fabric; Function 3
+	12c4  Turin Data Fabric; Function 4
+	12c5  Turin Data Fabric; Function 5
+	12c6  Turin Data Fabric; Function 6
+	12c7  Turin Data Fabric; Function 7
 	1300  Family 11h Processor HyperTransport Configuration
 	1301  Family 11h Processor Address Map
 	1302  Family 11h Processor DRAM Controller
@@ -5130,6 +5151,7 @@
 	1480  Starship/Matisse Root Complex
 		1462 7c37  X570-A PRO motherboard
 		15d9 1b95  H12SSL-i
+		1849 1480  ROME2D32LM3
 	1481  Starship/Matisse IOMMU
 	1482  Starship/Matisse PCIe Dummy Host Bridge
 	1483  Starship/Matisse GPP Bridge
@@ -5163,18 +5185,72 @@
 	149c  Matisse USB 3.0 Host Controller
 		1462 7c37  X570-A PRO motherboard
 	149d  Vangogh CVIP
+	149e  Genoa/Bergamo IOMMU
+	149f  Genoa/Bergamo Dummy Host Bridge
+	14a4  Genoa/Bergamo Root Complex
+	14a5  Genoa/Bergamo GPP Bridge
+	14a6  Genoa/Bergamo RCEC
+	14a7  Genoa/Bergamo Internal PCIe GPP Bridge to Bus [D:B]
+	14aa  Genoa/Bergamo GPP Bridge
+	14ab  Genoa/Bergamo GPP Bridge
+	14ac  Genoa/Bergamo Dummy Function
+	14ad  Genoa/Bergamo Data Fabric; Function 0
+	14ae  Genoa/Bergamo Data Fabric; Function 1
+	14af  Genoa/Bergamo Data Fabric; Function 2
+	14b0  Genoa/Bergamo Data Fabric; Function 3
+	14b1  Genoa/Bergamo Data Fabric; Function 4
+	14b2  Genoa/Bergamo Data Fabric; Function 5
+	14b3  Genoa/Bergamo Data Fabric; Function 6
+	14b4  Genoa/Bergamo Data Fabric; Function 7
 	14b5  Family 17h-19h PCIe Root Complex
 	14b6  Family 17h-19h IOMMU
 	14b7  Family 17h-19h PCIe Dummy Host Bridge
 	14b8  Family 17h-19h PCIe GPP Bridge
 	14b9  Family 17h-19h Internal PCIe GPP Bridge
 	14ba  Family 17h-19h PCIe GPP Bridge
+	14c1  Secondary vNTB
 # Server device
 	14ca  Genoa CCP/PSP 4.0 Device
 	14cd  Family 19h USB4/Thunderbolt PCIe tunnel
-	14de  Phoenix PCIe Dummy Function
+	14d8  Raphael/Granite Ridge Root Complex
+	14d9  Raphael/Granite Ridge IOMMU
+	14da  Raphael/Granite Ridge Dummy Host Bridge
+	14db  Raphael/Granite Ridge GPP Bridge
+	14dc  SDXI
+	14dd  Raphael/Granite Ridge Internal GPP Bridge to Bus [C:A]
+	14de  Raphael/Granite Ridge PCIe Dummy Function
+	14e0  Raphael/Granite Ridge Data Fabric; Function 0
+	14e1  Raphael/Granite Ridge Data Fabric; Function 1
+	14e2  Raphael/Granite Ridge Data Fabric; Function 2
+	14e3  Raphael/Granite Ridge Data Fabric; Function 3
+	14e4  Raphael/Granite Ridge Data Fabric; Function 4
+	14e5  Raphael/Granite Ridge Data Fabric; Function 5
+	14e6  Raphael/Granite Ridge Data Fabric; Function 6
+	14e7  Raphael/Granite Ridge Data Fabric; Function 7
+	14e8  Phoenix Root Complex
+	14e9  Phoenix IOMMU
+	14ea  Phoenix Dummy Host Bridge
+	14eb  Phoenix Internal GPP Bridge to Bus [C:A]
+	14ec  Phoenix Dummy Function
+	14ed  Phoenix GPP Bridge
+	14ee  Phoenix GPP Bridge
 	14ef  Family 19h USB4/Thunderbolt PCIe tunnel
+	14f0  Phoenix Data Fabric; Function 0
+	14f1  Phoenix Data Fabric; Function 1
+	14f2  Phoenix Data Fabric; Function 2
+	14f3  Phoenix Data Fabric; Function 3
+	14f4  Phoenix Data Fabric; Function 4
+	14f5  Phoenix Data Fabric; Function 5
+	14f6  Phoenix Data Fabric; Function 6
+	14f7  Phoenix Data Fabric; Function 7
 	1502  AMD IPU Device
+	1507  Strix Root Complex
+	1508  Strix IOMMU
+	1509  Strix Dummy Host Bridge
+	150a  Strix PCIe USB4 Bridge
+	150b  Strix GPP Bridge
+	150c  Strix Internal GPP Bridge to Bus [C:A]
+	150d  Strix PCIe Dummy function
 	1510  Family 14h Processor Root Complex
 		174b 1001  PURE Fusion Mini
 	1512  Family 14h Processor Root Port
@@ -5194,12 +5270,11 @@
 	1537  Kabini/Mullins PSP-Platform Security Processor
 	1538  Family 16h Processor Function 0
 	1539  Kabini P2P Bridge for PCIe Ports[4:0]
-# AMD EPYC Turin CPU
-	153a  Family 1Ah (Models 00h-0Fh) Root Complex
-# AMD EPYC Turin CPU
-	153b  Family 1Ah (Models 00h-0Fh) IOMMU
-# AMD EPYC Turin CPU
-	153d  Family 1Ah (Models 00h-0Fh) PCIe Dummy Host Bridge
+	153a  Turin Root Complex
+	153b  Turin IOMMU
+	153c  Turin RCEC
+	153d  Turin PCIe Dummy Host Bridge
+	153e  Turin GPP Bridge
 	1540  Kryptos/Cato/Garfield/Garfield+/Arlene/Pooky HT Configuration
 	1541  Kryptos/Cato/Garfield/Garfield+/Arlene/Pooky Address Maps
 	1542  Kryptos/Cato/Garfield/Garfield+/Arlene/Pooky DRAM Configuration
@@ -5216,10 +5291,10 @@
 	154f  Anubis Audio Processor
 	1550  Garfield+/Arlene/Pooky/Anubis SPLL Configuration
 	1553  Arlene/Pooky P2P Bridge for PCIE (3:0)
-# AMD EPYC Turin CPU
-	1555  Family 1Ah (Models 00h-0Fh) Internal PCIe GPP Bridge
-# AMD EPYC Turin CPU
-	1556  Family 1Ah (Models 00h-0Fh) PCIe Dummy Function
+	1554  Turin GPP Bridge
+	1555  Turin Internal PCIe GPP Bridge to Bus [D:C]
+	1556  Turin PCIe Dummy Function
+	1557  Turin USB 3.1 xHCI
 	155b  Anubis Root Complex
 	155c  Anubis IOMMU
 	155d  Anubis UMI PCIe Dummy Bridge
@@ -5228,6 +5303,7 @@
 	1566  Family 16h (Models 30h-3fh) Processor Root Complex
 	1567  Mullins IOMMU
 	156b  Family 16h (Models 30h-3fh) Host Bridge
+	156e  Turin CCP/ASP
 	1570  Family 15h (Models 60h-6fh) Processor Function 0
 	1571  Family 15h (Models 60h-6fh) Processor Function 1
 	1572  Family 15h (Models 60h-6fh) Processor Function 2
@@ -5266,11 +5342,13 @@
 	15b3  Stoney Miscellaneous Configuration
 	15b4  Stoney PM Configuration
 	15b5  Stoney NB Performance Monitor
+	15b6  Raphael/Granite Ridge USB 3.1 xHCI
+	15b7  Raphael/Granite Ridge USB 3.1 xHCI
 	15bc  Stoney PCIe [GFX,GPP] Bridge [4:0]
 	15be  Stoney Audio Processor
 	15c4  Phoenix USB4/Thunderbolt NHI controller #1
 	15c5  Phoenix USB4/Thunderbolt NHI controller #2
-	15c7  Family 19h (Model 74h) CCP/PSP 3.0 Device
+	15c7  Phoenix CCP/PSP 3.0 Device
 	15d0  Raven/Raven2 Root Complex
 		103c 8615  Pavilion Laptop 15-cw1xxx
 		1043 876b  PRIME B450M-A Motherboard
@@ -5309,7 +5387,7 @@
 	15e2  ACP/ACP3X/ACP6x Audio Coprocessor
 		17aa 5124  ThinkPad E595
 		ea50 ce19  mCOM10-L1900
-	15e3  Family 17h/19h HD Audio Controller
+	15e3  Family 17h/19h/1ah HD Audio Controller
 		103c 8615  Pavilion Laptop 15-cw1xxx
 		103c 8b17  ProBook 445 G9/455 G9
 		1043 86c7  PRIME B450M-A Motherboard
@@ -5402,6 +5480,8 @@
 	1647  VanGogh PCIe GPP Bridge
 	1648  VanGogh Internal PCIe GPP Bridge to Bus
 	1649  Family 19h PSP/CCP
+	164a  Sensor Fusion Hub
+	164b  Non-Sensor Fusion Hub
 	164f  Milan IOMMU
 	1650  Milan Data Fabric; Function 0
 	1651  Milan Data Fabric; Function 1
@@ -5437,6 +5517,14 @@
 	167e  Rembrandt Data Fabric: Device 18h; Function 5
 	167f  Rembrandt Data Fabric: Device 18h; Function 6
 	1680  Rembrandt Data Fabric: Device 18h; Function 7
+	16f8  Strix Data Fabric; Function 0
+	16f9  Strix Data Fabric; Function 1
+	16fa  Strix Data Fabric; Function 2
+	16fb  Strix Data Fabric; Function 3
+	16fc  Strix Data Fabric; Function 4
+	16fd  Strix Data Fabric; Function 5
+	16fe  Strix Data Fabric; Function 6
+	16ff  Strix Data Fabric; Function 7
 	1700  Family 12h/14h Processor Function 0
 	1701  Family 12h/14h Processor Function 1
 	1702  Family 12h/14h Processor Function 2
@@ -5454,6 +5542,8 @@
 	1716  Family 12h/14h Processor Function 5
 	1718  Family 12h/14h Processor Function 6
 	1719  Family 12h/14h Processor Function 7
+	17e0  Strix CCP/ASP
+	17f0  Strix Neural Processing Unit
 	2000  79C97x [PCnet32 LANCE]
 		1014 2000  NetFinity 10/100 Fast Ethernet
 		1022 2000  PCnet - Fast 79C971
@@ -5808,7 +5898,8 @@
 	5225  M5225
 	5229  M5229
 	5235  M5235
-	5237  M5237 PCI USB Host Controller
+	5237  OHCI USB Controller
+	5239  EHCI USB Controller
 	5240  EIDE Controller
 	5241  PCMCIA Bridge
 	5242  General Purpose Controller
@@ -6731,28 +6822,28 @@
 	c066  3010S Ultra3 Dual Channel
 1045  OPTi Inc.
 	a0f8  82C750 [Vendetta] USB Controller
-	c101  92C264
+	c101  82C264 GUI Accelerator
 	c178  92C178
 	c556  82X556 [Viper]
 	c557  82C557 [Viper-M]
 	c558  82C558 [Viper-M ISA+IDE]
-	c567  82C750 [Vendetta], device 0
-	c568  82C750 [Vendetta], device 1
+	c567  82C750 [Vendetta] Host Bridge
+	c568  82C750 [Vendetta] ISA Bridge
 	c569  82C579 [Viper XPress+ Chipset]
-	c621  82C621 [Viper-M/N+]
+	c621  82C621A PCI IDE Contoller
 	c700  82C700 [FireStar]
-	c701  82C701 [FireStar Plus]
-	c814  82C814 [Firebridge 1]
+	c701  82C700 [FireStar] Host Bridge
+	c814  82C814 [FireBridge II] Docking Stration Controller
 	c822  82C822
-	c824  82C824
-	c825  82C825 [Firebridge 2]
+	c824  82C824 [FireFox] 32-Bit PC Card Controller
+	c825  82C825 [FireBridge II] Docking Stration Controller
 	c832  82C832
-	c861  82C861 OHCI USB Host
+	c861  82C861/2/3 [FireLink] PCI-USB Host Bridge
 	c881  82C881 [FireLink] 1394 OHCI Link Controller
 	c895  82C895
-	c935  EV1935 ECTIVA MachOne PCIAudio
-	d568  82C825 [Firebridge 2]
-	d721  IDE [FireStar]
+	c935  82С935 [MachOne] Integrated PCI Audio Processor
+	d568  82C700 [FireStar] PCI IDE Controller
+	d721  82C700 [FireStar] PCI IDE Controller
 1046  IPC Corporation, Ltd.
 1047  Genoa Systems Corp
 1048  Elsa AG
@@ -7151,12 +7242,13 @@
 	0001  W83769F
 	0033  W89C33D 802.11 a/b/g BB/MAC
 	0105  W82C105
+	0628  W83628F/W83629D PCI to ISA Bridge Set
 	0840  W89C840
 		1050 0001  W89C840 Ethernet Adapter
 		1050 0840  W89C840 Ethernet Adapter
 	0940  W89C940
-	5a5a  W89C940F
-	6692  W6692
+	5a5a  W89C940 Twisted-pair Ether-LAN Controller With PCI Interface [ELANC-PCI]
+	6692  W6692 PCI ISDN S/T-Controller
 		1043 1702  ISDN Adapter (PCI Bus, D, W)
 		1043 1703  ISDN Adapter (PCI Bus, DV, W)
 		1043 1707  ISDN Adapter (PCI Bus, DV, W)
@@ -7165,6 +7257,7 @@
 		144f 1707  ISDN Adapter (PCI Bus, DV, W)
 	9921  W99200F MPEG-1 Video Encoder
 	9922  W99200F/W9922PF MPEG-1/2 Video Encoder
+	9960  W9960CF Video Codec
 	9970  W9970CF
 1051  Anigma, Inc.
 1052  ?Young Micro Systems
@@ -9163,13 +9256,13 @@
 	5842  2051 ISA bridge
 10ab  Digicom
 10ac  Honeywell IAC
-10ad  Symphony Labs
+10ad  Winbond Electronics Corp / Symphony Labs
 	0001  W83769F
 	0003  SL82C103
 	0005  SL82C105
 	0103  SL82c103
-	0105  SL82c105
-	0565  W83C553F/W83C554F
+	0105  SL82C105/W83C55xF Bus Master IDE
+	0565  W83C553F/554F ISA bridge
 10ae  Cornerstone Technology
 10af  Micro Computer Systems Inc
 10b0  CardExpert Technology
@@ -13009,7 +13102,7 @@
 	2182  TU116 [GeForce GTX 1660 Ti]
 	2183  TU116
 	2184  TU116 [GeForce GTX 1660]
-	2187  TU116 [GeForce GTX 1660 SUPER]
+	2187  TU116 [GeForce GTX 1650 SUPER]
 	2188  TU116 [GeForce GTX 1650]
 	2189  TU116 [CMP 30HX]
 	2191  TU116M [GeForce GTX 1660 Ti Mobile]
@@ -13047,6 +13140,7 @@
 	223f  GA102GL
 	228b  GA104 High Definition Audio Controller
 	228e  GA106 High Definition Audio Controller
+	2291  GA107 High Definition Audio Controller
 	2296  Tegra PCIe Endpoint Virtual Network
 	22a3  GH100 [H100 NVSwitch]
 	22ba  AD102 High Definition Audio Controller
@@ -13059,6 +13153,7 @@
 	2322  GH100 [H800 PCIe]
 	2324  GH100 [H800]
 	2329  GH100 [H20]
+	232c  GH100 [H20 HBM3e]
 	2330  GH100 [H100 SXM5 80GB]
 	2331  GH100 [H100 PCIe]
 	2335  GH100 [H200 SXM 141GB]
@@ -13072,6 +13167,7 @@
 	2342  GH100 [GH200 120GB / 480GB]
 	2343  GH100
 	2345  GH100 [GH100-88K-A1]
+	2348  GH100 [GH200 144G HBM3e]
 	237f  GH100 [Skinny Joe]
 	23b0  GH100
 	23f0  GH100
@@ -13150,7 +13246,7 @@
 	25a9  GA107M [GeForce RTX 2050]
 	25aa  GA107M [GeForce MX570 A]
 	25ab  GA107M [GeForce RTX 3050 4GB Laptop GPU]
-	25ac  GN20-P0-R-K2 [GeForce RTX 3050 6GB Laptop GPU]
+	25ac  GA107BM / GN20-P0-R-K2 [GeForce RTX 3050 6GB Laptop GPU]
 	25ad  GA107 [GeForce RTX 2050]
 	25af  GA107 [GeForce RTX 3050 Engineering Sample]
 	25b0  GA107GL [RTX A1000]
@@ -13167,7 +13263,7 @@
 	25e0  GA107BM [GeForce RTX 3050 Ti Mobile]
 	25e2  GA107BM [GeForce RTX 3050 Mobile]
 	25e5  GA107BM [GeForce RTX 3050 Mobile]
-	25ec  GN20-P0-R-K2 [GeForce RTX 3050 6GB Laptop GPU]
+	25ec  GA107BM / GN20-P0-R-K2 [GeForce RTX 3050 6GB Laptop GPU]
 	25ed  GA107 [GeForce RTX 2050]
 	25f9  GA107 [RTX A1000 Embedded GPU ]
 	25fa  GA107 [RTX A2000 Embedded GPU]
@@ -13190,9 +13286,9 @@
 	2704  AD103 [GeForce RTX 4080]
 	2705  AD103 [GeForce RTX 4070 Ti SUPER]
 	2709  AD103 [GeForce RTX 4070]
-	2717  GN21-X11 [GeForce RTX 4090 Laptop GPU]
+	2717  AD103M / GN21-X11 [GeForce RTX 4090 Laptop GPU]
 	2730  AD103GLM [RTX 5000 Ada Generation Laptop GPU]
-	2757  GN21-X11 [GeForce RTX 4090 Laptop GPU]
+	2757  AD103M / GN21-X11 [GeForce RTX 4090 Laptop GPU]
 	2770  AD103GLM [RTX 5000 Ada Generation Embedded GPU]
 	2782  AD104 [GeForce RTX 4070 Ti]
 	2783  AD104 [GeForce RTX 4070 SUPER]
@@ -13230,8 +13326,24 @@
 	28e0  AD107M [GeForce RTX 4060 Max-Q / Mobile]
 	28e1  AD107M [GeForce RTX 4050 Max-Q / Mobile]
 	28f8  AD107GLM [RTX 2000 Ada Generation Embedded GPU]
-	2900  GB100
-	2940  GB100
+	2900  GB100 [Reserved Dev ID A]
+	2940  GB100 [Reserved Dev ID B]
+	2941  GB100 [GB200 SKU]
+	2980  GB102
+	29c0  GB102
+	2c18  GB203M / GN22 [GeForce RTX 5090 Max-Q / Mobile]
+	2c19  GB203M / GN22 [GeForce RTX 5080 Max-Q / Mobile]
+	2c2c  GB6-256(N22W-ES-A1)
+	2c58  GB203M / GN22-X11 [GeForce RTX 5090 Max-Q / Mobile]
+	2c59  GB203M / GN22-X9 [GeForce RTX 5080 Max-Q / Mobile]
+	2d18  AD108M [GeForce RTX 5070 Max-Q / Mobile]
+	2d19  AD108M [GeForce RTX 5060 Max-Q / Mobile]
+	2d58  AD108M [GeForce RTX 5070 Max-Q / Mobile]
+	2d59  AD108M [GeForce RTX 5060 Max-Q / Mobile]
+	2d98  AD108M [GeForce RTX 5050 Max-Q / Mobile]
+	2dd8  AD108M [GeForce RTX 5050 Max-Q / Mobile]
+	2f18  AD108M [GeForce RTX 5070 Ti Max-Q / Mobile]
+	2f58  AD108M [GeForce RTX 5070 Ti Max-Q / Mobile]
 10df  Emulex Corporation
 	0720  OneConnect NIC (Skyhawk)
 		103c 1934  FlexFabric 20Gb 2-port 650M Adapter
@@ -13333,6 +13445,7 @@
 	f500  LPe37000/LPe38000 Series 32Gb/64Gb Fibre Channel Adapter
 		1014 06c1  PCIe4 4-Port 32Gb Fibre Channel Adapter for POWER (FC EN1L/EN1M; CCIN 2CFC)
 		1014 06c2  PCIe4 2-Port 64Gb Fibre Channel Adapter for POWER (FC EN1N/EN1P; CCIN 2CFD)
+	f600  LPe37100S/LPe38100S Series 32Gb/64Gb Fibre Channel Adapter
 	f700  LP7000 Fibre Channel Host Adapter
 	f701  LP7000 Fibre Channel Host Adapter Alternate ID (JX1:2-3, JX2:1-2)
 	f800  LP8000 Fibre Channel Host Adapter
@@ -13671,6 +13784,7 @@
 	8813  RTL8813AE 802.11ac PCIe Wireless Network Adapter
 	8821  RTL8821AE 802.11ac PCIe Wireless Network Adapter
 	8852  RTL8852AE 802.11ax PCIe Wireless Network Adapter
+	8922  RTL8922AE 802.11be PCIe Wireless Network Adapter
 	a85a  RTL8852AE WiFi 6 802.11ax PCIe Adapter
 	b520  RTL8852BE-VT PCIe 802.11ax Wireless Network Controller
 	b723  RTL8723BE PCIe Wireless Network Adapter
@@ -14046,6 +14160,7 @@
 	0410  VX900 Series Host Bridge: Host Control
 	0415  VT6415 PATA IDE Host Controller
 		1043 838f  Motherboard
+	0419  VN1000 Host Bridge
 	0501  VT8501 [Apollo MVP4]
 	0505  VT82C505
 # Shares chip with :0576. The VT82C576M has :1571 instead of :0561.
@@ -14137,6 +14252,7 @@
 	1364  CN896/VN896/P4M900 Host Bridge
 	1409  VX855/VX875 Error Reporting
 	1410  VX900 Series Error Reporting
+	1419  VN1000 Host Bridge
 	1571  VT82C576M/VT82C586
 	1595  VT82C595/97 [Apollo VP2/97]
 	1732  VT1732 [Envy24 II] PCI Multi-Channel Audio Controller
@@ -14162,11 +14278,12 @@
 	2364  CN896/VN896/P4M900 Host Bridge
 	2409  VX855/VX875 Host Bus Control
 	2410  VX900 Series CPU Bus Controller
+	2419  VN1000 Host Bridge
 	287a  VT8251 PCI to PCI Bridge
 	287b  VT8251 Host Bridge
 	287c  VT8251 PCIE Root Port
 	287d  VT8251 PCIE Root Port
-	287e  VT8237/8251 Ultra VLINK Controller
+	287e  VT8237/8251/8261 Ultra VLINK Controller
 	3022  CLE266
 	3038  VT82xx/62xx/VX700/8x0/900 UHCI USB 1.1 Controller
 		0925 1234  onboard UHCI USB 1.1 Controller
@@ -14331,7 +14448,7 @@
 	3116  VT8375 [KM266/KL266] Host Bridge
 		1297 f641  FX41 motherboard
 	3118  CN400/PM800/PM880/PN800/PN880 [S3 UniChrome Pro]
-	3119  VT6120/VT6121/VT6122 Gigabit Ethernet Adapter
+	3119  VT6120/VT6121/VT6122/VT6130 Gigabit Ethernet Adapter
 	3122  VT8623 [Apollo CLE266] integrated CastleRock graphics
 	3123  VT8623 [Apollo CLE266]
 	3128  VT8753 [P4X266 AGP]
@@ -14432,12 +14549,14 @@
 	3372  VT8237S PCI to ISA Bridge
 	337a  VT8237A PCI to PCI Bridge
 	337b  VT8237A Host Bridge
+	3402  VT8261 PCI to ISA Bridge
 	3403  VT6315 Series Firewire Controller
 		1043 8374  M5A88-V EVO
 		1043 8384  P8P67 Deluxe Motherboard
 	3409  VX855/VX875 DRAM Bus Control
 	3410  VX900 Series DRAM Bus Control
 		19da a179  ZBOX nano VD01
+	3419  VN1000 Host Bridge
 	3432  VL800/801 xHCI USB 3.0 Controller
 	3456  VX11 Standard Host Bridge
 	345b  VX11 Miscellaneous Bus
@@ -14466,6 +14585,7 @@
 	4409  VX855/VX875 Power Management Control
 	4410  VX900 Series Power Management and Chip Testing Control
 		19da a179  ZBOX nano VD01
+	4419  VN1000 Host Bridge
 	5030  VT82C596 ACPI [Apollo PRO]
 	5122  VX855/VX875 Chrome 9 HCM Integrated Graphics
 	5208  PT890 I/O APIC Interrupt Controller
@@ -14483,6 +14603,7 @@
 	5372  VT8237/8251 Serial ATA Controller
 	5409  VX855/VX875 APIC and Central Traffic Control
 	5410  VX900 Series APIC and Central Traffic Control
+	5419  VN1000 I/O APIC Interrupt Controller
 	6100  VT85C100A [Rhine II]
 	6122  VN1000 Graphics [Chrome 520 IGP]
 	6287  SATA RAID Controller
@@ -14493,6 +14614,7 @@
 	6409  VX855/VX875 Scratch Registers
 	6410  VX900 Series Scratch Registers
 		19da a179  ZBOX nano VD01
+	6419  VN1000 Host Bridge
 	7122  VX900 Graphics [Chrome9 HD]
 	7204  K8M800 Host Bridge
 	7205  KM400/KN400/P4M800 [S3 UniChrome]
@@ -14519,6 +14641,7 @@
 	7409  VX855/VX875 North-South Module Interface Control
 	7410  VX900 Series North-South Module Interface Control
 		19da a179  ZBOX nano VD01
+	7419  VN1000 Host Bridge
 	8231  VT8231 [PCI-to-ISA Bridge]
 	8235  VT8235 ACPI
 	8305  VT8363/8365 [KT133/KM133 AGP]
@@ -14543,12 +14666,14 @@
 	8a26  KL133/KL133A/KM133/KM133A [S3 ProSavage]
 	8d01  PN133/PN133T [S3 Twister]
 	8d04  KM266/P4M266/P4M266A/P4N266 [S3 ProSavageDDR]
+	9000  VT8261 IDE Controller [StorX IDE Controller - 9000]
 	9001  VX900 Series Serial-ATA Controller
+	9040  VT8261 SATA Controller [StorX RAID Controller - 9040]
 	9082  Standard AHCI 1.0 SATA Controller
 	9140  HDMI Audio Device
 	9201  USB3.0 Controller
 	9380  Ncore Coprocessor for Centaur CNS
-	9530  VX800/820/900 Series Secure Digital Memory Card Controller
+	9530  VX800/820/900/VT8261 Series Secure Digital Memory Card Controller
 	95d0  VX800/820/900 Series SDIO Host Controller
 	a208  PT890 PCI to PCI Bridge Controller
 	a238  K8T890 PCI to PCI Bridge Controller
@@ -14557,6 +14682,7 @@
 	a364  CN896/VN896/P4M900 PCI to PCI Bridge Controller
 	a409  VX855/VX875/VX900 Series USB Device Controller
 	a410  VX900 Series PCI Express Root Port 0
+	a419  VN1000 PCI to PCI Bridge
 	b091  VT8633 [Apollo Pro266 AGP]
 	b099  VT8366/A/7 [Apollo KT266/A/333 AGP]
 	b101  VT8653 AGP Bridge
@@ -14572,6 +14698,7 @@
 	b213  VPX/VPX2 I/O APIC Interrupt Controller
 	b353  VX855/VX875/VX900 PCI to PCI Bridge
 	b410  VX900 Series PCI Express Root Port 1
+	b419  VN1000 Host Bridge
 	b999  [K8T890 North / VT8237 South] PCI Bridge
 	c208  PT890 PCI to PCI Bridge Controller
 	c238  K8T890 PCI to PCI Bridge Controller
@@ -14581,22 +14708,26 @@
 	c364  CN896/VN896/P4M900 PCI to PCI Bridge Controller
 	c409  VX855/VX875 EIDE Controller
 	c410  VX900 Series PCI Express Root Port 2
+	c419  VN1000 PCI to PCI Bridge
 	d104  VT8237R USB UDCI Controller
 	d208  PT890 PCI to PCI Bridge Controller
 	d213  VPX/VPX2 PCI to PCI Bridge Controller
 	d238  K8T890 PCI to PCI Bridge Controller
 	d340  PT900 PCI to PCI Bridge Controller
 	d410  VX900 Series PCI Express Root Port 3
+	d419  VN1000 PCI to PCI Bridge
 	e208  PT890 PCI to PCI Bridge Controller
 	e238  K8T890 PCI to PCI Bridge Controller
 	e340  PT900 PCI to PCI Bridge Controller
 	e353  VX800/820-Series PCI-Express Root Port 0
 	e410  VX900 Series PCI Express Physical Layer Electrical Sub-block
+	e419  VN1000 PCI to PCI Bridge
 	f208  PT890 PCI to PCI Bridge Controller
 	f238  K8T890 PCI to PCI Bridge Controller
 	f340  PT900 PCI to PCI Bridge Controller
 	f353  VX800/820-Series PCI-Express Root Port 1
 	f410  VX900 Series PCI UART Port 0-3
+	f419  VN1000 PCI to PCI Bridge
 1107  Stratus Computers
 	0576  VIA VT82C570MV [Apollo] (Wrong vendor ID!)
 1108  Proteon, Inc.
@@ -17016,9 +17147,7 @@
 123b  Seeq Technology, Inc.
 123c  Century Systems, Inc.
 123d  Engineering Design Team, Inc.
-	0000  EasyConnect 8/32
-	0002  EasyConnect 8/64
-	0003  EasyIO
+	0000  PCI 11W
 	0047  PCIe4 CDa
 	004b  PCIe4 CDa 16
 	009d  VisionLink F1
@@ -17132,7 +17261,9 @@
 		1028 0085  ES1968 Maestro-2 PCI
 		1033 8051  ES1968 Maestro-2 Audiodrive
 	1969  ES1938/ES1946/ES1969 Solo-1 Audiodrive
+		1014 0162  16 Bit PCI Audio Adapter (37L4457)
 		1014 0166  ES1969 SOLO-1 AudioDrive on IBM Aptiva Mainboard
+		121f 8800  eDio Hi-Live SC1938
 		125d 8888  Solo-1 Audio Adapter
 		125d 8898  ES1938S TTSOLO1-SL [TerraTec 128i PCI]
 		153b 111b  Terratec 128i PCI
@@ -18379,6 +18510,8 @@
 	0207  GLN180PEX GPS/GLONASS receiver (PCI Express)
 	0208  GPS180AMC GPS Receiver (PCI Express / MicroTCA / AdvancedMC)
 	0209  GNS181PEX GPS/Galileo/GLONASS/BEIDOU receiver (PCI Express)
+	020a  GPS183PEX GPS Receiver (PCI Express)
+	020b  GNS183PEX GPS/Galileo/GLONASS/BEIDOU receiver (PCI Express)
 	0301  TCR510PCI IRIG Timecode Reader
 	0302  TCR167PCI IRIG Timecode Reader
 	0303  TCR511PCI IRIG Timecode Reader
@@ -20018,6 +20151,7 @@
 		1028 215a  DC NVMe PM9A3 RI U.2 960GB
 		1028 215b  DC NVMe PM9A3 RI U.2 1.92TB
 		1028 215c  DC NVMe PM9A3 RI U.2 3.84TB
+		1028 215d  Dell DC NVMe PM9A3 RI U.2 7.68TB
 		1028 2166  DC NVMe PM9A3 RI 110M.2 960GB
 		1028 2167  DC NVMe PM9A3 RI 110M.2 1.92TB
 		1028 2168  DC NVMe PM9A3 RI 80M.2 480GB
@@ -20154,6 +20288,45 @@
 		1028 225e  NVMe FIPS PM1745 MU U.2 12.8TB
 		1028 225f  NVMe PM1745 MU U.2 12.8TB
 	a900  NVMe SSD Controller PM9DXa
+		1028 230f  DC NVMe PM9D3a RI 80M.2 480GB ISE
+		1028 2310  DC NVMe PM9D3a RI 80M.2 960GB ISE
+		1028 2311  DC NVMe PM9D3a RI 80M.2 1.92TB ISE
+		1028 2341  DC NVMe PM9D3a RI U.2 960GB　
+		1028 2342  DC NVMe PM9D3a RI U.2 1.92TB
+		1028 2343  DC NVMe PM9D3a RI U.2 3.84TB
+		1028 2344  DC NVMe PM9D3a RI U.2 7.68GTB
+		1028 2345  DC NVMe PM9D3a RI U.2 15.36TB
+		1028 2346  DC NVMe FIPS PM9D3a RI U.2 960GB
+		1028 2347  DC NVMe FIPS PM9D3a RI U.2 1.92TB
+		1028 2348  DC NVMe FIPS PM9D3a RI U.2 3.84TB
+		1028 2349  DC NVMe FIPS PM9D3a RI U.2 7.68TB
+		1028 234a  DC NVMe FIPS PM9D3a RI U.2 15.36TB　
+		1028 234d  DC NVMe PM9D3a RI E3s 1.92TB
+		1028 234e  DC NVMe PM9D3a RI E3s 3.84TB　
+		1028 234f  DC NVMe PM9D3a RI E3s 7.68GTB
+		1028 2350  DC NVMe PM9D3a RI E3s 15.36TB
+		1028 2351  DC NVMe FIPS PM9D3a RI E3s 1.92TB
+		1028 2352  DC NVMe FIPS PM9D3a RI E3s 3.84TB
+		1028 2353  DC NVMe FIPS PM9D3a RI E3s 7.68TB
+		1028 2354  DC NVMe FIPS PM9D3a RI E3s 15.36TB
+		1028 2355  DC NVMe PM9D5a MU U.2 800GB
+		1028 2356  DC NVMe PM9D5a MU U.2 1.6TB
+		1028 2357  DC NVMe PM9D5a MU U.2 3.2TB
+		1028 2358  DC NVMe PM9D5a MU U.2 6.4TB
+		1028 2359  DC NVMe PM9D5a MU E3.s 1.6TB
+		1028 235a  DC NVMe PM9D5a MU E3.s 3.2TB
+		1028 235b  DC NVMe PM9D5a MU E3.s 6.4TB
+	aa00  NVMe SSD Controller BM1743
+		1028 2312  NVMe FIPS BM1743 QLC U.2 15.36TB
+		1028 2313  NVMe FIPS BM1743 QLC U.2 30.72TB
+		1028 2314  NVMe FIPS BM1743 QLC U.2 61.44TB
+		1028 2315  NVMe BM1743 QLC U.2 15.36TB
+		1028 2316  NVMe BM1743 QLC U.2 30.72TB
+		1028 2317  NVMe BM1743 QLC U.2 61.44TB
+		1028 2364  NVMe BM1743 QLC U.2 122.88TB
+		1028 2366  MZ3MO15THCLCAD3
+		1028 2367  MZ3MO30THCLFAD3
+	ac00  NVMe SSD Controller PM175x
 	ecec  Exynos 8895 PCIe Root Complex
 144e  OLITEC
 144f  Askey Computer Corp.
@@ -20353,6 +20526,7 @@
 14c3  MEDIATEK Corp.
 	0608  MT7921K (RZ608) Wi-Fi 6E 80MHz
 	0616  MT7922 802.11ax PCI Express Wireless Network Adapter
+	4d75  T700 5G Modem [5G Solution 5000]
 	7603  MT7603E 802.11bgn PCI Express Wireless Network Adapter
 	7612  MT7612E 802.11acbgn PCI Express Wireless Network Adapter
 	7615  MT7615E 802.11ac PCI Express Wireless Network Adapter
@@ -20360,12 +20534,16 @@
 	7650  MT7650 802.11ac
 # MT7612E too?
 	7662  MT7662E 802.11ac PCI Express Wireless Network Adapter
+	7663  MT7663 802.11ac PCI Express Wireless Network Adapter
 	7915  MT7915E 802.11ax PCI Express Wireless Network Adapter
 	7916  MT7905D/MT7975
 # WiFi 6E capable
 	7922  MT7922 802.11ax PCI Express Wireless Network Adapter
 		1a3b 5300  ASUS PCE-AXE59BT
 	7961  MT7921 802.11ax PCI Express Wireless Network Adapter
+	7988  MT7988 PCIe Host Bridge [Filogic 880]
+	7990  MT7996 802.11be PCI Express Wireless Network Adapter (Port 0)
+	7991  MT7996 802.11be PCI Express Wireless Network Adapter (Port 1)
 	8650  MT7650 Bluetooth
 14c4  IWASAKI Information Systems Co Ltd
 14c5  Automation Products AB
@@ -20992,7 +21170,20 @@
 		14e4 d142  NetXtreme-E P425D BCM57504 4x25G SFP28 PCIE
 		1590 0420  HPE Ethernet 25/50Gb 2-port 6310C Adapter
 	1752  BCM57502 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet
-	1760  BCM57608 10Gb/25Gb/50Gb/100Gb/200Gb/400Gb Ethernet
+	1760  BCM57608 25Gb/50Gb/100Gb/200Gb/400Gb Ethernet
+		14e4 9110  BCM57608 1x400G PCIe Ethernet NIC
+		14e4 9120  BCM57608 2x200G PCIe Ethernet NIC
+		14e4 9121  BCM57608 2x100G PCIe Ethernet NIC
+		14e4 9125  BCM57608 2x200G PCIe Ethernet NIC
+		14e4 9126  BCM57608 2x100G PCIe Ethernet NIC
+		14e4 9140  BCM57608 1x400G QSFP-DD PCIe Ethernet NIC
+		14e4 9310  BCM57608 1x400G QSFP-DD OCP Ethernet NIC
+		14e4 9311  BCM57608 1x400G OCP Ethernet NIC
+		14e4 9312  BCM57608 1x200G OCP Ethernet NIC
+		14e4 9320  BCM57608 2x200G OCP Ethernet NIC
+		14e4 9325  BCM57608 2x200G OCP Ethernet NIC
+		14e4 9326  BCM57608 2x100G OCP Ethernet NIC
+		14e4 9340  BCM57608 4x100G OCP Ethernet NIC
 		14e4 d125  BCM57608 2x200G PCIe Ethernet NIC
 	1800  BCM57502 NetXtreme-E Ethernet Partition
 	1801  BCM57504 NetXtreme-E Ethernet Partition
@@ -21010,6 +21201,7 @@
 		14e4 df24  BCM57508 NetXtreme-E NGM2100D 2x100G KR Mezz Ethernet Virtual Function
 	1809  BCM5750X NetXtreme-E RDMA Virtual Function
 		14e4 df24  BCM57508 NetXtreme-E NGM2100D 2x100G KR Mezz RDMA Virtual Function
+	1819  BCM5760X Ethernet Virtual Function
 	2711  BCM2711 PCIe Bridge
 	2712  BCM2712 PCIe Bridge
 	3352  BCM3352
@@ -21983,6 +22175,7 @@
 	6893  3U OpenVPX Multi-function I/O Board [Model 68C3]
 15ad  VMware
 	0405  SVGA II Adapter
+	0406  SVGA II Adapter (Fusion)
 	0710  SVGA Adapter
 	0720  VMXNET Ethernet Controller
 	0740  Virtual Machine Communication Interface
@@ -22061,7 +22254,7 @@
 	0271  Spectrum-5 RMA
 	0274  Spectrum-6 in Flash Recovery Mode
 	0275  Spectrum-6 RMA
-	0277  Spectrum-4TOR RMA
+	0277  Spectrum-6 Tile
 	0278  Quantum-4 in Flash Recovery Mode
 	0279  Quantum-4 RMA
 	027a  Eros Chiplet
@@ -22075,6 +22268,7 @@
 # Flash recovery
 	0288  Arcus2
 	0289  Arcus2 RMA
+	0290  SagittaZ
 	1002  MT25400 Family [ConnectX-2 Virtual Function]
 	1003  MT27500 Family [ConnectX-3]
 		1014 04b5  PCIe3 40GbE RoCE Converged Host Bus Adapter for Power
@@ -22151,6 +22345,7 @@
 		193d 1083  NIC-ETH640F-3S-2P
 # NIC-ETH540F-3S-2P OCP3.0 2x10G Card
 		193d 1084  NIC-ETH540F-3S-2P
+		1e81 0c10  25GbE dual-port SFP28, PCIe3.0 x8 [3SC10]
 	1016  MT27710 Family [ConnectX-4 Lx Virtual Function]
 	1017  MT27800 Family [ConnectX-5]
 		15b3 0006  ConnectX-5 EN network interface card, 100GbE single-port QSFP28, PCIe3.0 x16, tall bracket; MCX515A-CCAT
@@ -22169,9 +22364,14 @@
 	101b  MT28908 Family [ConnectX-6]
 	101c  MT28908 Family [ConnectX-6 Virtual Function]
 	101d  MT2892 Family [ConnectX-6 Dx]
+		193d 1055  NIC-ETH1040F-LP-2P QSFP56 2x100GbE PCIe Network Adapter
 	101e  ConnectX Family mlx5Gen Virtual Function
 	101f  MT2894 Family [ConnectX-6 Lx]
 		193d 1035  NIC-ETH641F-LP-2P SFP28 2x25GbE PCIe Network Adapter
+		1bd4 00ac  O252MCX6Lx
+		1bd4 00ae  S252MCX6Lx
+		1ff9 00ad  ENFM6251-SP2
+		1ff9 00af  ENPM6251-SP2
 	1020  MT28860
 	1021  MT2910 Family [ConnectX-7]
 	1023  CX8 Family [ConnectX-8]
@@ -22316,7 +22516,7 @@
 	5006  SanDisk Extreme Pro / WD Black SN750 / PC SN730 / Red SN700 NVMe SSD
 	5007  IX SN530 NVMe SSD (DRAM-less)
 	5008  PC SN530 NVMe SSD (DRAM-less)
-	5009  SanDisk Ultra 3D / WD Blue SN550 NVMe SSD
+	5009  SanDisk Ultra 3D / WD PC SN530, IX SN530, Blue SN550 NVMe SSD (DRAM-less)
 		15b7 5009  WD Blue SN550 NVMe SSD
 	500b  PC SN530 NVMe SSD
 		1414 500b  Xbox Series X
@@ -22339,6 +22539,7 @@
 	5036  WD PC SN5000S M.2 2280 NVMe SSD (DRAM-less)
 	5041  WD Blue SN580 NVMe SSD (DRAM-less)
 	5042  WD Black SN770M NVMe SSD (DRAM-less)
+	5046  SanDisk Extreme NVMe SSD (DRAM-less)
 15b8  ADDI-DATA GmbH
 	1001  APCI1516 SP controller (16 digi outputs)
 	1003  APCI1032 SP controller (32 digi inputs w/ opto coupler)
@@ -23436,6 +23637,7 @@
 	8084  GL880 USB 2.0 EHCI controller
 	9750  GL9750 SD Host Controller
 	9755  GL9755 SD Host Controller
+	9767  GL9767 SD Host Controller
 	e763  GL9763E eMMC Controller
 17aa  Lenovo
 	0003  LENSE20256GMSP34MEAT2TA
@@ -23930,6 +24132,8 @@
 # Sitecom HFC-S based ISDN controller card DC-105v2
 	3069  DC-105v2 ISDN controller
 18d4  Celestica
+# OCP-TAP
+	1007  Time Card
 18d8  Dialogue Technology Corp.
 18dd  Artimi Inc
 	4c6f  Artimi RTMI-100 UWB adapter
@@ -24676,6 +24880,7 @@
 1a03  ASPEED Technology, Inc.
 	1150  AST1150 PCI-to-PCI Bridge
 	2000  ASPEED Graphics Family
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 		15d9 1b95  H12SSL-i
 1a05  deltaww
@@ -24826,6 +25031,7 @@
 1ad7  Spectracom Corporation
 	8000  TSync-PCIe Time Code Processor
 	9100  TPRO-PCI-66U Timecode Reader/Generator
+	a000  OCP-TAP [ARTCard]
 1ade  Spin Master Ltd.
 	1501  Swipetech barcode scanner
 	3038  PCIe Video Bridge
@@ -25457,6 +25663,7 @@
 	0022  FD788
 	0023  FD722-M2
 	0024  FD722 with bypass
+	0025  FD922
 1c28  Lite-On IT Corp. / Plextor
 	0122  M6e PCI Express SSD [Marvell 88SS9183]
 # previously Fiberblaze
@@ -25656,6 +25863,12 @@
 		1c5f 5437  NVMe SSD PBlaze6 6647 3200G 2.5" U.2(dual port)
 		1c5f 5441  NVMe SSD PBlaze6 6547 6400G 2.5" U.2
 		1c5f 5447  NVMe SSD PBlaze6 6647 6400G 2.5" U.2(dual port)
+	0027  PBlaze7 7A40/7A46 NVMe SSD
+		1c5f 1421  NVMe SSD PBlaze7 7A40 1920G 2.5" U.2
+		1c5f 1431  NVMe SSD PBlaze7 7A40 3840G 2.5" U.2
+		1c5f 1441  NVMe SSD PBlaze7 7A40 7680G 2.5" U.2
+		1c5f 5431  NVMe SSD PBlaze7 7A46 3200G 2.5" U.2
+		1c5f 5441  NVMe SSD PBlaze7 7A46 6400G 2.5" U.2
 	003d  PBlaze5 920/926
 		1c5f 0a30  NVMe SSD PBlaze5 920 3840G AIC
 		1c5f 0a31  NVMe SSD PBlaze5 920 3840G 2.5" U.2
@@ -25679,9 +25892,12 @@
 		1c5f 4b61  NVMe SSD PBlaze6 6936 25600GB 2.5" U.3
 	003f  PBlaze7 7940/7946 NVMe SSD
 		1c5f 0431  NVMe SSD PBlaze7 7940 3840G 2.5" U.2
+		1c5f 0441  NVMe SSD PBlaze7 7940 7680G 2.5" U.2
+		1c5f 0451  NVMe SSD PBlaze7 7940 15360G 2.5" U.2
 		1c5f 0c31  NVMe SSD PBlaze7 7940 3840G 2.5" U.2
 		1c5f 0c41  NVMe SSD PBlaze7 7940 7680G 2.5" U.2
 		1c5f 0c51  NVMe SSD PBlaze7 7940 15360G 2.5" U.2
+		1c5f 0c61  NVMe SSD PBlaze7 7940 30720G 2.5" U.2
 		1c5f 1430  NVMe SSD PBlaze7 7940 3840G AIC
 		1c5f 1431  NVMe SSD PBlaze7 7940 3840G 2.5" U.2
 		1c5f 1435  NVMe SSD PBlaze7 7940 3840G E1.S
@@ -25690,6 +25906,9 @@
 		1c5f 1445  NVMe SSD PBlaze7 7940 7680G E1.S
 		1c5f 1450  NVMe SSD PBlaze7 7940 15360G AIC
 		1c5f 1451  NVMe SSD PBlaze7 7940 15360G 2.5" U.2
+		1c5f 4431  NVMe SSD PBlaze7 7946 3200G 2.5" U.2
+		1c5f 4441  NVMe SSD PBlaze7 7946 6400G 2.5" U.2
+		1c5f 4451  NVMe SSD PBlaze7 7946 12800G 2.5" U.2
 		1c5f 4c31  NVMe SSD PBlaze7 7946 3200G 2.5" U.2
 		1c5f 4c41  NVMe SSD PBlaze7 7946 6400G 2.5" U.2
 		1c5f 4c51  NVMe SSD PBlaze7 7946 12800G 2.5" U.2
@@ -25775,6 +25994,7 @@
 	627a  LEGEND 800 NVMe SSD (DRAM-less)
 # 500GB
 	628a  LEGEND 800 NVMe SSD (DRAM-less)
+	642a  XPG GAMMIX S50 CORE NVMe SSD (DRAM-less)
 	8201  XPG SX8200 Pro PCIe Gen3x4 M.2 2280 Solid State Drive
 1cc4  Shenzhen Unionmemory Information System Ltd.
 	1203  NVMe SSD Controller UHXXXa series
@@ -25808,6 +26028,8 @@
 	6a03  RPETJ512MKP1QDQ PCIe 4.0 NVMe SSD 512GB (DRAM-less)
 	6a13  RPJYJ512MKN1QWQ PCIe 4.0 NVMe SSD 512GB (DRAM-less)
 	6a14  RPEYJ1T24MKN2QWY PCIe 4.0 NVMe SSD 1024GB (DRAM-less)
+	6b13  RPJYJ512MLR1QWY PCIe 4.0 NVMe SSD 512GB (DRAM-less)
+	6b14  RPJYJ1T24MLR1HWY PCIe 4.0 NVMe SSD 1024GB (DRAM-less)
 	8030  NVMe SSD Controller UH8X2X/UH7X2X series
 		1cc4 1122  NVMe SSD UH812a U.2 1.92TB
 		1cc4 1123  NVMe SSD UH812a U.2 3.84TB
@@ -25894,6 +26116,7 @@
 	efa0  Elastic Fabric Adapter (EFA)
 	efa1  Elastic Fabric Adapter (EFA)
 	efa2  Elastic Fabric Adapter (EFA)
+	efa3  Elastic Fabric Adapter (EFA)
 1d17  Zhaoxin
 	070f  ZX-100 PCI Express Root Port
 	0710  ZX-100/ZX-200 PCI Express Root Port
@@ -26106,6 +26329,8 @@
 	1028  AR-P2P-ATR [P2P Actor Function]
 	1029  AR-P2P-UTL [P2P Utility Function]
 	102a  AR-TK242-FX2 [4x100GbE Gen5 Packet Capture-Replay Device]
+	102b  AR-ARKV-FX1 [Arkville 128B DPDK Data Mover for Versal/CPM5]
+	102c  AR-TK242-V80 [Gen5 PCAP Processor]
 	4200  A5PL-E1-10GETI [10 GbE Ethernet Traffic Instrument]
 1d72  Xiaomi
 1d78  DERA Storage
@@ -26255,6 +26480,7 @@
 	0010  Networking DOM Engine
 	0011  IO Bridge
 	0013  Host Network Interface
+	0400  Time Card
 1da1  Teko Telecom S.r.l.
 1da2  Sapphire Technology Limited
 	475d  Radeon RX 7800 XT [PULSE]
@@ -26270,6 +26496,7 @@
 	1010  HL-2000 AI Training Accelerator [Gaudi secured]
 # PCIe accelerator card for Deep Learning training tasks
 	1020  Gaudi2 AI Training Accelerator
+	1060  Gaudi3 AI Training Accelerator
 1da8  Corigine, Inc.
 	3800  Network Flow Processor 3800
 	3803  Network Flow Processor 3800 Virtual Function
@@ -26333,6 +26560,7 @@
 		1dbe 5007  Dongting-N3 DC SSD U.2 12800GB
 		1dbe 5008  Dongting-N3 DC SSD U.2 15360GB
 		1dbe 5009  Dongting-N3 DC SSD U.2 25600GB
+		1dbe 5010  Dongting-N3 DC SSD U.2 30720GB
 	5669  NVMe SSD Controller IG5669 [Tacoma]
 1dbf  Guizhou Huaxintong Semiconductor Technology Co., Ltd
 	0401  StarDragon4800 PCI Express Root Port
@@ -26793,6 +27021,7 @@
 		1028 223d  Ent NVMe CM7 U.2 MU 3.2TB
 		1028 223e  Ent NVMe CM7 U.2 MU 1.6TB
 	002a  Exceria Plus G3 NVMe SSD (DRAM-less)
+	002b  NVMe SSD Controller CD8P
 	002c  NVMe SSD Controller CD8P EDSFF
 		1028 22bf  DC NVMe CD8P E3.S 15.36TB
 		1028 22c0  DC NVMe CD8P E3.S 7.68TB
@@ -26848,8 +27077,14 @@
 1e3b  DapuStor Corporation
 	0600  NVMe SSD Controller DP600
 		1e3b 0006  Enterprise NVMe SSD U.2 7.68TB (J5000)
+		1e3b 000c  Enterprise NVMe SSD U.2 30.72TB (J5060)
+		1e3b 000d  Enterprise NVMe SSD U.2 61.44TB (J5060)
+		1e3b 000e  Enterprise NVMe SSD U.2 30.72TB (J5060D)
+		1e3b 000f  Enterprise NVMe SSD U.2 61.44TB (J5060D)
 		1e3b 0010  Enterprise NVMe SSD U.2 3.84TB (R5102)
 		1e3b 0013  Enterprise NVMe SSD U.2 3.20TB (R5302)
+		1e3b 0027  Enterprise NVMe SSD U.2 61.44TB (J5060)
+		1e3b 0028  Enterprise NVMe SSD U.2 61.44TB (J5060D)
 		1e3b 0030  Enterprise NVMe SSD U.2 3.84TB (J5100)
 		1e3b 0031  Enterprise NVMe SSD U.2 7.68TB (J5100)
 		1e3b 0032  Enterprise NVMe SSD U.2 15.36TB (J5100)
@@ -26905,7 +27140,7 @@
 		1e3b 00ea  Enterprise NVMe SSD U.2 3.20TB (J5301D)
 		1e3b 00eb  Enterprise NVMe SSD U.2 6.40TB (J5301D)
 		1e3b 00ec  Enterprise NVMe SSD U.2 30.72TB (J5101)
-		1e3b 00ed  NVMe SSD U.2 30.72TB (R5101)
+		1e3b 00ed  Enterprise NVMe SSD U.2 30.72TB (R5101)
 		1e3b 00ee  Enterprise NVMe SSD U.2 15.36B (J5101)
 		1e3b 00ef  Enterprise NVMe SSD U.2 12.80TB (J5301)
 		1e3b 00f0  Enterprise NVMe SSD U.2 0.40TB (X2900)
@@ -26914,7 +27149,7 @@
 		1e3b 00f3  Enterprise NVMe SSD U.2 3.20TB (X2900)
 		1e3b 00f5  Enterprise NVMe SSD U.2 0.40TB (X2900P)
 		1e3b 00f6  Enterprise NVMe SSD U.2 0.80TB (X2900P)
-	0800  DP800
+	0800  NVMe SSD Controller DP800
 		1e3b 0001  Enterprise NVMe SSD U.2 3.84TB(R6100)
 		1e3b 0007  Enterprise NVMe SSD U.2 15.36TB (R6100)
 		1e3b 000a  Enterprise NVMe SSD U.2 3.20TB (R6300)
@@ -26962,6 +27197,12 @@
 		1e3b 0082  Enterprise NVMe SSD U.2 7.68TB (H5100)
 		1e3b 0084  Enterprise NVMe SSD U.2 3.2TB (H5300)
 		1e3b 0085  Enterprise NVMe SSD U.2 6.4TB (H5300)
+	3001  Ethernet Controller DN200 for 10GbE SFP+
+		1e3b 3001  Ethernet Network Adapter DN200-X1V for 10GbE SFP+ 2-port
+	3002  Ethernet Controller DN200 Series Virtual Function
+	300c  Ethernet RAID Combo Controller DN200C for 1GbE
+		1e3b 300c  Ethernet RAID Combo Adapter DN200C-G2V for 1GbE 4-port
+	300d  Ethernet RAID Combo Controller DN200C Series Virtual Function
 1e3d  Burlywood, Inc
 1e43  MaxLinear Inc
 	8904  MxL8904
@@ -27078,6 +27319,7 @@
 1eac  Quectel Wireless Solutions Co., Ltd.
 	1001  EM120R-GL LTE Modem
 	1002  EM160R-GL LTE Modem
+	2001  EM120R-GL
 1eae  XFX Limited
 1eb0  Shenzhen Electrical Appliances CO.
 	1901  NVMe SSD Controller (DRAM-less)
@@ -27177,7 +27419,7 @@
 		1ee1 000b  Airglow A430 NVMe SSD U.2 4.8TB
 		1ee1 0012  Airglow Z400 NVMe ZNS SSD U.2 5.76TB
 1ee4  PETAIO INC
-	1180  P8118 NVMe SSD Series
+	1180  PETA8118 NVMe SSD Series
 		1ee4 0015  NVMe SSD U.2 1.92TB (P8118E)
 		1ee4 0016  NVMe SSD U.2 3.84TB (P8118E)
 		1ee4 0017  NVMe SSD U.2 7.68TB (P8118E)
@@ -27196,6 +27438,72 @@
 		1ee4 0225  NVMe SSD U.2 1.6TB (P8118X)
 		1ee4 0226  NVMe SSD U.2 3.2TB (P8118X)
 		1ee4 0227  NVMe SSD U.2 6.4TB (P8118X)
+		1ee4 1013  NVMe SSD M.2 480GB (P8118E)
+		1ee4 1014  NVMe SSD M.2 960GB (P8118E)
+		1ee4 1015  NVMe SSD M.2 1.92TB (P8118E)
+		1ee4 1016  NVMe SSD M.2 3.84TB (P8118E)
+		1ee4 1023  NVMe SSD M.2 400GB (P8118E)
+		1ee4 1024  NVMe SSD M.2 800GB (P8118E)
+		1ee4 1025  NVMe SSD M.2 1.6TB (P8118E)
+		1ee4 1026  NVMe SSD M.2 3.2TB (P8118E)
+		1ee4 1113  NVMe SSD M.2 480GB (P8118Z)
+		1ee4 1114  NVMe SSD M.2 960GB (P8118Z)
+		1ee4 1115  NVMe SSD M.2 1.92TB (P8118Z)
+		1ee4 1116  NVMe SSD M.2 3.84TB (P8118Z)
+		1ee4 1123  NVMe SSD M.2 400GB (P8118Z)
+		1ee4 1124  NVMe SSD M.2 800GB (P8118Z)
+		1ee4 1125  NVMe SSD M.2 1.6TB (P8118Z)
+		1ee4 1126  NVMe SSD M.2 3.2TB (P8118Z)
+		1ee4 1213  NVMe SSD M.2 480GB (P8118X)
+		1ee4 1214  NVMe SSD M.2 960GB (P8118X)
+		1ee4 1215  NVMe SSD M.2 1.92TB (P8118X)
+		1ee4 1216  NVMe SSD M.2 3.84TB (P8118X)
+		1ee4 1223  NVMe SSD M.2 400GB (P8118X)
+		1ee4 1224  NVMe SSD M.2 800GB (P8118X)
+		1ee4 1225  NVMe SSD M.2 1.6TB (P8118X)
+		1ee4 1226  NVMe SSD M.2 3.2TB (P8118X)
+		1ee4 2015  NVMe SSD E1.S 1.92TB (P8118E)
+		1ee4 2016  NVMe SSD E1.S 3.84TB (P8118E)
+		1ee4 2017  NVMe SSD E1.S 7.68TB (P8118E)
+		1ee4 2025  NVMe SSD E1.S 1.6TB (P8118E)
+		1ee4 2026  NVMe SSD E1.S 3.2TB (P8118E)
+		1ee4 2027  NVMe SSD E1.S 6.4TB (P8118E)
+		1ee4 2115  NVMe SSD E1.S 1.92TB (P8118Z)
+		1ee4 2116  NVMe SSD E1.S 3.84TB (P8118Z)
+		1ee4 2117  NVMe SSD E1.S 7.68TB (P8118Z)
+		1ee4 2125  NVMe SSD E1.S 1.6TB (P8118Z)
+		1ee4 2126  NVMe SSD E1.S 3.2TB (P8118Z)
+		1ee4 2127  NVMe SSD E1.S 6.4TB (P8118Z)
+		1ee4 2215  NVMe SSD E1.S 1.92TB (P8118X)
+		1ee4 2216  NVMe SSD E1.S 3.84TB (P8118X)
+		1ee4 2217  NVMe SSD E1.S 7.68TB (P8118X)
+		1ee4 2225  NVMe SSD E1.S 1.6TB (P8118X)
+		1ee4 2226  NVMe SSD E1.S 3.2TB (P8118X)
+		1ee4 2227  NVMe SSD E1.S 6.4TB (P8118X)
+		1ee4 3013  NVMe SSD AIC 480GB (P8118E)
+		1ee4 3014  NVMe SSD AIC 960GB (P8118E)
+		1ee4 3015  NVMe SSD AIC 1.92TB (P8118E)
+		1ee4 3016  NVMe SSD AIC 3.84TB (P8118E)
+		1ee4 3017  NVMe SSD AIC 7.68TB (P8118E)
+		1ee4 3025  NVMe SSD AIC 1.6TB (P8118E)
+		1ee4 3026  NVMe SSD AIC 3.2TB (P8118E)
+		1ee4 3027  NVMe SSD AIC 6.4TB (P8118E)
+		1ee4 3113  NVMe SSD AIC 480GB (P8118Z)
+		1ee4 3114  NVMe SSD AIC 960GB (P8118Z)
+		1ee4 3115  NVMe SSD AIC 1.92TB (P8118Z)
+		1ee4 3116  NVMe SSD AIC 3.84TB (P8118Z)
+		1ee4 3117  NVMe SSD AIC 7.68TB (P8118Z)
+		1ee4 3125  NVMe SSD AIC 1.6TB (P8118Z)
+		1ee4 3126  NVMe SSD AIC 3.2TB (P8118Z)
+		1ee4 3127  NVMe SSD AIC 6.4TB (P8118Z)
+		1ee4 3213  NVMe SSD AIC 480GB (P8118X)
+		1ee4 3214  NVMe SSD AIC 960GB (P8118X)
+		1ee4 3215  NVMe SSD AIC 1.92TB (P8118X)
+		1ee4 3216  NVMe SSD AIC 3.84TB (P8118X)
+		1ee4 3217  NVMe SSD AIC 7.68TB (P8118X)
+		1ee4 3225  NVMe SSD AIC 1.6TB (P8118X)
+		1ee4 3226  NVMe SSD AIC 3.2TB (P8118X)
+		1ee4 3227  NVMe SSD AIC 6.4TB (P8118X)
 		1ee4 abcd  NVMe SSD U.2
 1ee9  SUSE LLC
 1eec  Viscore Technologies Ltd
@@ -27357,8 +27665,19 @@
 	5236  PCIe 4 INNOGRIT based NVMe SSD
 	5765  PCIe 3 NVMe SSD (DRAM-less)
 1f44  VVDN Technologies Private Limited
-# YUSUR Technology Co., Ltd.
-1f47  YUSUR Tech
+1f47  YUSUR Technology Co., Ltd.
+	1001  FLEXFLOW-2200T Ethernet Controller
+		1f47 0001  FLEXFLOW-2200T Ethernet 10G 2P
+		1f47 0002  FLEXFLOW-2200T Ethernet 25G 2P-
+		1f47 0003  FLEXFLOW-2200T Ethernet 40G 2P
+		1f47 0004  FLEXFLOW-2200T Ethernet 100G 1P
+		1f47 0005  FLEXFLOW-2200T Ethernet 100G 2P
+		1f47 0006  FLEXFLOW-2200T Ethernet 10G 2P
+		1f47 0007  FLEXFLOW-2200T Ethernet 25G 2P
+		1f47 0008  FLEXFLOW-2200T Ethernet 40G 2P
+		1f47 0009  FLEXFLOW-2200T Ethernet 100G 1P
+		1f47 000a  FLEXFLOW-2200T Ethernet 100G 2P
+	1003  FLEXFLOW-2200T Ethernet Controller MGMT Function
 # Network Accelerating Card
 	2018  DPU Card
 # Network Accelerating Card
@@ -27490,6 +27809,9 @@
 1fd4  SUNIX Co., Ltd.
 	0001  Matrix multiport serial adapter
 	1999  Multiport serial controller
+1fdd  Wuqi Microelectronics Co., Ltd.
+	0001  WQ9201 802.11ax PCIe Wireless Network Adapter
+	1001  WQ9301 802.11ax PCIe Wireless Access Points
 1fde  Kratos Defense & Security Solutions, Inc.
 	1125  OpenEdge 1125P
 	2500  OpenEdge 2500P
@@ -27497,7 +27819,8 @@
 	1010  AWM 1
 	2000  AWM 2
 	2010  AWM 2-M
-1fe1  Beijing Eswin Computing Technology Co., Ltd.
+1fe1  Beijing ESWIN Computing Technology Co., Ltd.
+	2030  EIC7700 Root Complex
 1fe4  HippStor Technology
 	1600  HP600 Series NVMe SSD
 		1fe4 0075  Enterprise NVMe SSD U.2 3.84TB(HP610)
@@ -27505,6 +27828,7 @@
 		1fe4 0077  Enterprise NVMe SSD U.2 6.40TB(HP630)
 		1fe4 0078  Enterprise NVMe SSD U.2 3.20TB(HP630)
 1fe9  MemryX
+	0100  MX3
 # LinkData Technology (Tianjin) Co., LTD
 1ff2  Linkdata
 	10a1  NIC1160 Ethernet Controller Family
@@ -27573,32 +27897,32 @@
 	006d  HS610
 2646  Kingston Technology Company, Inc.
 	0010  HyperX Predator PCIe AHCI SSD
-	2262  KC2000/KC2500 NVMe SSD SM2262EN
-	2263  A2000 NVMe SSD SM2263EN
+	2262  KC2000/KC2500 NVMe SSD [SM2262EN]
+	2263  A2000 NVMe SSD [SM2263EN]
 	5008  A1000/U-SNS8154P3 x2 NVMe SSD
-	500a  DC1000B NVMe SSD E12DC
-	500b  DC1000M NVMe SSD SM2270
+	500a  DC1000B NVMe SSD [E12DC]
+	500b  DC1000M NVMe SSD [SM2270]
 	500c  OM8PCP Design-In PCIe 3 NVMe SSD (DRAM-less)
 	500d  OM3PDP3 NVMe SSD
-	500e  NV1 NVMe SSD E13T (DRAM-less)
-	500f  NV1 NVMe SSD SM2263XT (DRAM-less)
+	500e  NV1 NVMe SSD [E13T] (DRAM-less)
+	500f  NV1 NVMe SSD [SM2263XT] (DRAM-less)
 	5010  OM8SBP NVMe PCIe SSD (DRAM-less)
-	5012  DC1500M NVMe SSD SM2270
-	5013  KC3000/FURY Renegade NVMe SSD E18
+	5012  DC1500M NVMe SSD [SM2270]
+	5013  KC3000/FURY Renegade NVMe SSD [E18]
 	5014  OM8SEP4 Design-In PCIe 4 NVMe SSD (TLC) (DRAM-less)
 	5016  OM3PGP4 NVMe SSD
-	5017  NV2 NVMe SSD SM2267XT (DRAM-less)
-	5019  NV2 NVMe SSD E21T (DRAM-less)
+	5017  NV2 NVMe SSD [SM2267XT] (DRAM-less)
+	5019  NV2 NVMe SSD [E21T] (DRAM-less)
 # 128GB
 	501a  OM8PGP4 Design-In PCIe 4 NVMe SSD (TLC) (DRAM-less)
 	501b  OM8PGP4 NVMe PCIe SSD (DRAM-less)
-	501c  NV2 NVMe SSD E19T (DRAM-less)
-	501d  NV2 NVMe SSD TC2200 (DRAM-less)
-	501f  FURY Renegade NVMe SSD with heatsink
+	501c  NV2 NVMe SSD [E19T] (DRAM-less)
+	501d  NV2 NVMe SSD [TC2200] (DRAM-less)
+	501f  FURY Renegade NVMe SSD + Heatsink [E18]
 	5021  OM8SEP4 Design-In PCIe 4 NVMe SSD (QLC) (DRAM-less)
 	5022  OM8PGP4 Design-In PCIe 4 NVMe SSD (QLC) (DRAM-less)
-	5023  NV2 NVMe SSD SM2269XT (DRAM-less)
-	5024  DC2000B NVMe SSD E18DC
+	5023  NV2 NVMe SSD [SM2269XT] (DRAM-less)
+	5024  DC2000B NVMe SSD [E18DC]
 	5025  NV3 NVMe SSD TC2201 (DRAM-less)
 	5026  NV3 NVMe SSD E21T (DRAM-less)
 	5027  NV3 NVMe SSD E27T (DRAM-less)
@@ -28344,6 +28668,7 @@
 # Wrong ID used in subsystem ID of AsusTek PCI-USB2 PCI card.
 807d  Asustek Computer, Inc.
 8080  Chengdu Storeswift Technology Co., Ltd.
+	4016  CX4016A NVMe SSD Controller
 8086  Intel Corporation
 	0007  82379AB
 	0008  Extended Express System Support Controller
@@ -28644,6 +28969,7 @@
 	0685  Z490 Chipset LPC/eSPI Controller
 	0687  Q470 Chipset LPC/eSPI Controller
 	068d  Comet Lake LPC Controller
+	068e  WM490 Chipset LPC/eSPI Controller
 	06a3  Comet Lake PCH SMBus Controller
 	06a4  Comet Lake PCH SPI Controller
 	06a8  Comet Lake PCH Serial IO UART Host Controller #0
@@ -28652,9 +28978,11 @@
 	06ab  Comet Lake PCH Serial IO SPI Controller #1
 	06ac  Comet Lake PCI Express Root Port #21
 	06b0  Comet Lake PCI Express Root Port #9
+	06b8  Comet Lake PCIe Root Port #1
 	06ba  Comet Lake PCI Express Root Port #1
 	06bb  Comet Lake PCI Express Root Port #4
 	06bd  Comet Lake PCIe Port #6
+	06be  Comet Lake PCIe Root Port #7
 	06bf  Comet Lake PCIe Port #8
 	06c0  Comet Lake PCI Express Root Port #17
 	06c8  Comet Lake PCH cAVS
@@ -28678,6 +29006,7 @@
 		8086 42a4  Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9462 80MHz 1x1 [Jefferson Peak]
 	06f9  Comet Lake PCH Thermal Controller
 	06fb  Comet Lake PCH Serial IO SPI Controller #2
+	06fc  Comet Lake PCH Integrated Sensor Solution
 	0700  CE Media Processor A/V Bridge
 	0701  CE Media Processor NAND Flash Controller
 	0703  CE Media Processor Media Control Unit 1
@@ -29716,6 +30045,7 @@
 		8086 10a6  PRO/1000 PF Quad Port Server Adapter
 	10a6  82599EB 10-Gigabit Dummy Function
 	10a7  82575EB Gigabit Network Connection
+		15d9 10a7  X10DRW-i
 		8086 10a8  82575EB Gigabit Riser Card
 	10a9  82575EB Gigabit Backplane Connection
 	10b0  82573L PRO/1000 PL Network Connection
@@ -29878,6 +30208,7 @@
 		1bd4 002f  10G SFP+ DP EP102Fi4A Adapter
 		1bd4 0032  10G SFP+ DP EP102Fi4 Adapter
 		1bd4 0067  F102I82599
+		1f3f 0a00  Dual-port 10-Gigabit SFI/SFP+ Network Connection
 		4c52 1024  LR-LINK LRES9804BF Quad-port 10Gb Ethernet Server Adapter
 		4c52 3002  LRES3002PF Dual-port 10Gb Ethernet Server Adapter for OCP
 		4c52 3012  LRES3012PF Dual-port 10Gb Ethernet Server Adapter for OCP
@@ -30233,6 +30564,7 @@
 		1137 023e  1GigE I350 LOM
 		15d9 0000  AOC-SGP-i4
 		15d9 0652  Dual Port i350 GbE MicroLP [AOC-CGP-i2]
+		15d9 1521  X10DRW-i
 		17aa 1074  ThinkServer I350-T4 AnyFabric
 		17aa 4005  I350 Gigabit Network Connection
 		18d4 0c07  I350 1Gb 2-port RJ45 OCP Mezz Card MOP41-I-1GT2
@@ -30754,8 +31086,8 @@
 		1028 09be  Latitude 7410
 	15ec  JHL7540 Thunderbolt 3 USB Controller [Titan Ridge 4C 2018]
 		1028 09be  Latitude 7410
-	15ef  JHL7540 Thunderbolt 3 Bridge [Titan Ridge DD 2018]
-	15f0  JHL7540 Thunderbolt 3 USB Controller [Titan Ridge DD 2018]
+	15ef  JHL7440 Thunderbolt 3 Bridge [Titan Ridge DD 2018]
+	15f0  JHL7440 Thunderbolt 3 USB Controller [Titan Ridge DD 2018]
 	15f2  Ethernet Controller I225-LM
 		4c52 2031  LRES2031PT Single-port 2.5Gb Ethernet Network Adapter
 		8086 0001  Ethernet Network Adapter I225-T1
@@ -30772,6 +31104,7 @@
 	15fc  Ethernet Connection (13) I219-V
 	15ff  Ethernet Controller X710 for 10GBASE-T
 		1014 0000  PCIe3 4-port 10GbE Base-T Adapter
+		108e 0000  Quad Port 10GBase-T Adapter - CP
 		108e 7b1f  Quad Port 10GBase-T Adapter - CP
 		1137 0000  X710TLG GbE RJ45 PCIe NIC
 		1137 02c1  X710T2LG 2x10 GbE RJ45 PCIe NIC
@@ -34175,6 +34508,7 @@
 	2e95  4 Series Chipset HECI Controller
 	2e96  4 Series Chipset PT IDER Controller
 	2f00  Xeon E7 v3/Xeon E5 v3/Core i7 DMI2
+		15d9 0821  X10DRW-i
 	2f01  Xeon E7 v3/Xeon E5 v3/Core i7 PCI Express Root Port 0
 	2f02  Xeon E7 v3/Xeon E5 v3/Core i7 PCI Express Root Port 1
 	2f03  Xeon E7 v3/Xeon E5 v3/Core i7 PCI Express Root Port 1
@@ -34203,28 +34537,48 @@
 	2f1b  Xeon E7 v3/Xeon E5 v3/Core i7 IIO Debug
 	2f1c  Xeon E7 v3/Xeon E5 v3/Core i7 IIO Debug
 	2f1d  Xeon E7 v3/Xeon E5 v3/Core i7 PCIe Ring Interface
+		15d9 0821  X10DRW-i
 	2f1e  Xeon E7 v3/Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers
+		15d9 0821  X10DRW-i
 	2f1f  Xeon E7 v3/Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers
+		15d9 0821  X10DRW-i
 	2f20  Xeon E7 v3/Xeon E5 v3/Core i7 DMA Channel 0
+		15d9 0821  X10DRW-i
 	2f21  Xeon E7 v3/Xeon E5 v3/Core i7 DMA Channel 1
+		15d9 0821  X10DRW-i
 	2f22  Xeon E7 v3/Xeon E5 v3/Core i7 DMA Channel 2
+		15d9 0821  X10DRW-i
 	2f23  Xeon E7 v3/Xeon E5 v3/Core i7 DMA Channel 3
+		15d9 0821  X10DRW-i
 	2f24  Xeon E7 v3/Xeon E5 v3/Core i7 DMA Channel 4
+		15d9 0821  X10DRW-i
 	2f25  Xeon E7 v3/Xeon E5 v3/Core i7 DMA Channel 5
+		15d9 0821  X10DRW-i
 	2f26  Xeon E7 v3/Xeon E5 v3/Core i7 DMA Channel 6
+		15d9 0821  X10DRW-i
 	2f27  Xeon E7 v3/Xeon E5 v3/Core i7 DMA Channel 7
+		15d9 0821  X10DRW-i
 	2f28  Xeon E7 v3/Xeon E5 v3/Core i7 Address Map, VTd_Misc, System Management
+		15d9 0821  X10DRW-i
 	2f29  Xeon E7 v3/Xeon E5 v3/Core i7 Hot Plug
+		15d9 0821  X10DRW-i
 	2f2a  Xeon E7 v3/Xeon E5 v3/Core i7 RAS, Control Status and Global Errors
+		15d9 0821  X10DRW-i
 	2f2c  Xeon E7 v3/Xeon E5 v3/Core i7 I/O APIC
+		15d9 0821  X10DRW-i
 	2f2e  Xeon E7 v3/Xeon E5 v3/Core i7 RAID 5/6
 	2f2f  Xeon E7 v3/Xeon E5 v3/Core i7 RAID 5/6
 	2f30  Xeon E7 v3/Xeon E5 v3/Core i7 Home Agent 0
+		15d9 0821  X10DRW-i
 	2f32  Xeon E7 v3/Xeon E5 v3/Core i7 QPI Link 0
+		15d9 0821  X10DRW-i
 	2f33  Xeon E7 v3/Xeon E5 v3/Core i7 QPI Link 1
 	2f34  Xeon E7 v3/Xeon E5 v3/Core i7 PCIe Ring Interface
+		15d9 0821  X10DRW-i
 	2f36  Xeon E7 v3/Xeon E5 v3/Core i7 R3 QPI Link 0 & 1 Monitoring
+		15d9 0821  X10DRW-i
 	2f37  Xeon E7 v3/Xeon E5 v3/Core i7 R3 QPI Link 0 & 1 Monitoring
+		15d9 0821  X10DRW-i
 	2f38  Xeon E7 v3/Xeon E5 v3/Core i7 Home Agent 1
 	2f39  Xeon E7 v3/Xeon E5 v3/Core i7 I/O Performance Monitoring
 	2f3a  Xeon E7 v3/Xeon E5 v3/Core i7 QPI Link 2
@@ -34250,10 +34604,14 @@
 	2f78  Xeon E7 v3/Xeon E5 v3/Core i7 Home Agent 1 Debug
 	2f79  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 1 Target Address, Thermal & RAS Registers
 	2f7d  Xeon E7 v3/Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers
+		15d9 0821  X10DRW-i
 	2f7e  Xeon E7 v3/Xeon E5 v3/Core i7 E3 QPI Link Debug
 	2f80  Xeon E7 v3/Xeon E5 v3/Core i7 QPI Link 0
+		15d9 0821  X10DRW-i
 	2f81  Xeon E7 v3/Xeon E5 v3/Core i7 R3 QPI Link 0 & 1 Monitoring
+		15d9 0821  X10DRW-i
 	2f83  Xeon E7 v3/Xeon E5 v3/Core i7 QPI Link 0
+		15d9 0821  X10DRW-i
 	2f85  Xeon E7 v3/Xeon E5 v3/Core i7 QPI Link 0 Debug
 	2f86  Xeon E7 v3/Xeon E5 v3/Core i7 QPI Link 0 Debug
 	2f87  Xeon E7 v3/Xeon E5 v3/Core i7 QPI Link 0 Debug
@@ -34268,11 +34626,16 @@
 	2f9a  Xeon E7 v3/Xeon E5 v3/Core i7 Power Control Unit
 	2f9c  Xeon E7 v3/Xeon E5 v3/Core i7 Power Control Unit
 	2fa0  Xeon E7 v3/Xeon E5 v3/Core i7 Home Agent 0
+		15d9 0821  X10DRW-i
 	2fa8  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 0 Target Address, Thermal & RAS Registers
 	2faa  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 0 Channel Target Address Decoder
+		15d9 0821  X10DRW-i
 	2fab  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 0 Channel Target Address Decoder
+		15d9 0821  X10DRW-i
 	2fac  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 0 Channel Target Address Decoder
+		15d9 0821  X10DRW-i
 	2fad  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 0 Channel Target Address Decoder
+		15d9 0821  X10DRW-i
 	2fae  Xeon E7 v3/Xeon E5 v3/Core i7 DDRIO Channel 0/1 Broadcast
 	2faf  Xeon E7 v3/Xeon E5 v3/Core i7 DDRIO Global Broadcast
 	2fb0  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 0 Channel 0 Thermal Control
@@ -34306,13 +34669,21 @@
 	2fd6  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 1 Channel 2 ERROR Registers
 	2fd7  Xeon E7 v3/Xeon E5 v3/Core i7 Integrated Memory Controller 1 Channel 3 ERROR Registers
 	2fe0  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
+		15d9 0821  X10DRW-i
 	2fe1  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
+		15d9 0821  X10DRW-i
 	2fe2  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
+		15d9 0821  X10DRW-i
 	2fe3  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
+		15d9 0821  X10DRW-i
 	2fe4  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
+		15d9 0821  X10DRW-i
 	2fe5  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
+		15d9 0821  X10DRW-i
 	2fe6  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
+		15d9 0821  X10DRW-i
 	2fe7  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
+		15d9 0821  X10DRW-i
 	2fe8  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
 	2fe9  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
 	2fea  Xeon E7 v3/Xeon E5 v3/Core i7 Unicast Registers
@@ -34334,8 +34705,11 @@
 	2ffa  Xeon E7 v3/Xeon E5 v3/Core i7 Buffered Ring Agent
 	2ffb  Xeon E7 v3/Xeon E5 v3/Core i7 Buffered Ring Agent
 	2ffc  Xeon E7 v3/Xeon E5 v3/Core i7 System Address Decoder & Broadcast Registers
+		15d9 0821  X10DRW-i
 	2ffd  Xeon E7 v3/Xeon E5 v3/Core i7 System Address Decoder & Broadcast Registers
+		15d9 0821  X10DRW-i
 	2ffe  Xeon E7 v3/Xeon E5 v3/Core i7 System Address Decoder & Broadcast Registers
+		15d9 0821  X10DRW-i
 	3101  Killer E3100X 2.5 Gigabit Ethernet Controller
 	3140  Easel/Monette Hill Image Processor [Pixel Visual Core]
 	3165  Wireless 3165
@@ -35295,11 +35669,13 @@
 	43ba  Tiger Lake-H PCIe Root Port #3
 	43bb  Tiger Lake-H PCIe Root Port #4
 	43bc  Tiger Lake-H PCI Express Root Port #5
+	43be  11th Gen Core Processor PCIe Root Port #7
 	43c0  Tiger Lake-H PCIe Root Port #17
 	43c7  Tiger Lake-H PCIe Root Port #24
 	43c8  Tiger Lake-H HD Audio Controller
 	43d3  Tiger Lake SATA AHCI Controller
 	43e0  Tiger Lake-H Management Engine Interface
+	43e3  Tiger Lake AMT SOL Redirection
 	43e8  Tiger Lake-H Serial IO I2C Controller #0
 	43e9  Tiger Lake-H Serial IO I2C Controller #1
 	43ed  Tiger Lake-H USB 3.2 Gen 2x1 xHCI Host Controller
@@ -35591,6 +35967,10 @@
 	54b1  Alder Lake-N PCI Express Root Port #10
 	54b2  Alder Lake-N PCI Express Root Port #11
 	54b3  Alder Lake-N PCI Express Root Port #12
+	54b8  Alder Lake-N PCI Express Root Port #1
+	54b9  Alder Lake-N PCI Express Root Port #2
+	54ba  Alder Lake-N PCI Express Root Port #3
+	54be  Alder Lake-N PCI Express Root Port #7
 	54c8  Alder Lake-N PCH High Definition Audio Controller
 	54d3  Alder Lake-N SATA AHCI Controller
 	54e0  Alder Lake-N PCH HECI Controller
@@ -35623,12 +36003,12 @@
 	56b1  DG2 [Arc Pro A40/A50]
 	56b2  DG2 [Arc Pro A60M]
 	56b3  DG2 [Arc Pro A60]
-	56ba  DG2 [Intel Graphics]
-	56bb  DG2 [Intel Graphics]
-	56bc  DG2 [Intel Graphics]
-	56bd  DG2 [Intel Graphics]
-	56be  DG2 [Arc Graphics A750E]
-	56bf  DG2 [Arc Graphics A580E]
+	56ba  DG2 [Arc A380E]
+	56bb  DG2 [Arc A310E]
+	56bc  DG2 [Arc A370E]
+	56bd  DG2 [Arc A350E]
+	56be  DG2 [Arc A750E]
+	56bf  DG2 [Arc A580E]
 	56c0  ATS-M [Data Center GPU Flex 170]
 	56c1  ATS-M [Data Center GPU Flex 140]
 	56c2  ATS-M [Data Center GPU Flex 170V]
@@ -35644,6 +36024,7 @@
 	579c  Ethernet Connection E825-C for backplane
 	579d  Ethernet Connection E825-C for QSFP
 	579e  Ethernet Connection E825-C for SFP
+	579f  Ethernet Connection E825-C 10GbE
 	57a4  Thunderbolt Bridge [Barlow Ridge Hub 40G 2023]
 	57a5  Thunderbolt USB Controller [Barlow Ridge Hub 40G 2023]
 	57ad  E610 Virtual Function
@@ -35688,6 +36069,7 @@
 		17aa 2248  ThinkPad T570
 		17aa 224f  ThinkPad X1 Carbon 5th Gen
 	5917  UHD Graphics 620
+		17aa 225d  ThinkPad T480 (20L5)
 		17aa 225e  ThinkPad T480
 	5918  Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
 	591b  HD Graphics 630
@@ -36020,6 +36402,18 @@
 	7601  82372FB PIIX5 IDE
 	7602  82372FB PIIX5 USB
 	7603  82372FB PIIX5 SMBus
+	7725  Arrow Lake-H [PCH Serial IO UART Host Controller]
+	7726  Arrow Lake-H PCH Serial IO UART Host Controller]
+	7727  Arrow Lake-H [LPC/eSPI Controller]
+	7730  Arrow Lake-H [LPC/eSPI Controller]
+	7746  Arrow Lake-H [LPC/eSPI Controller]
+	7750  Arrow Lake-H [Serial IO I2C Host Controller]
+	7751  Arrow Lake-H [Serial IO I2C Host Controller]
+	7752  Arrow Lake-H [PCH Serial IO UART Host Controller]
+	7778  Arrow Lake-H [Serial IO I2C Host Controller]
+	7779  Arrow Lake-H [Serial IO I2C Host Controller]
+	777a  Arrow Lake-H [Serial IO I2C Host Controller]
+	777b  Arrow Lake-H [Serial IO I2C Host Controller]
 	7800  82740 (i740) AGP Graphics Accelerator
 		003d 0008  Starfighter AGP
 		003d 000b  Starfighter AGP
@@ -36073,12 +36467,15 @@
 	7acf  Alder Lake-S PCH Serial IO I2C Controller #3
 	7ad0  Alder Lake-S HD Audio Controller
 	7ae0  Alder Lake-S PCH USB 3.2 Gen 2x2 XHCI Controller
+	7ae1  Alder Lake-S PCH USB 3.2 Gen 1x1 xDCI Controller
 	7ae2  Alder Lake-S PCH SATA Controller [AHCI Mode]
 	7ae8  Alder Lake-S PCH HECI Controller #1
+	7aeb  Alder Lake-S Keyboard and Text (KT) Redirection
 	7af0  Alder Lake-S PCH CNVi WiFi
 		8086 0034  Wireless-AC 9560
 		8086 0070  Wi-Fi 6 AX201 160MHz
 		8086 0094  Wi-Fi 6 AX201 160MHz
+	7af8  Alder Lake-S Integrated Sensor Hub
 	7afc  Alder Lake-S PCH Serial IO I2C Controller #4
 	7afd  Alder Lake-S PCH Serial IO I2C Controller #5
 	7d03  Meteor Lake-P Dynamic Tuning Technology
@@ -36092,7 +36489,7 @@
 	7d51  Arrow Lake-P [Intel Graphics]
 	7d55  Meteor Lake-P [Intel Arc Graphics]
 	7d60  Meteor Lake-M [Intel Graphics]
-	7d67  Arrow Lake-U [Intel Graphics]
+	7d67  Arrow Lake-S [Intel Graphics]
 	7dd1  Arrow Lake-P [Intel Graphics]
 	7dd5  Meteor Lake-P [Intel Graphics]
 	7e01  Meteor Lake-P LPC/eSPI Controller
@@ -36387,6 +36784,7 @@
 	8cc6  H97 Chipset LPC Controller
 	8d00  C610/X99 series chipset 4-port SATA Controller [IDE mode]
 	8d02  C610/X99 series chipset 6-Port SATA Controller [AHCI mode]
+		15d9 0821  X10DRW-i
 	8d04  C610/X99 series chipset SATA Controller [RAID mode]
 	8d06  C610/X99 series chipset SATA Controller [RAID mode]
 		17aa 1031  ThinkServer RAID 110i
@@ -36411,19 +36809,26 @@
 	8d20  C610/X99 series chipset HD Audio Controller
 	8d21  C610/X99 series chipset HD Audio Controller
 	8d22  C610/X99 series chipset SMBus Controller
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 	8d24  C610/X99 series chipset Thermal Subsystem
+		15d9 0821  X10DRW-i
 	8d26  C610/X99 series chipset USB Enhanced Host Controller #1
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 	8d2d  C610/X99 series chipset USB Enhanced Host Controller #2
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 	8d31  C610/X99 series chipset USB xHCI Host Controller
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 	8d33  C610/X99 series chipset LAN Controller
 	8d34  C610/X99 series chipset NAND Controller
 	8d3a  C610/X99 series chipset MEI Controller #1
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 	8d3b  C610/X99 series chipset MEI Controller #2
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 	8d3c  C610/X99 series chipset IDE-r Controller
 	8d3d  C610/X99 series chipset KT Controller
@@ -36432,6 +36837,7 @@
 	8d42  C610/X99 series chipset LPC Controller
 	8d43  C610/X99 series chipset LPC Controller
 	8d44  C610/X99 series chipset LPC Controller
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 	8d45  C610/X99 series chipset LPC Controller
 	8d46  C610/X99 series chipset LPC Controller
@@ -36446,11 +36852,13 @@
 	8d4f  C610/X99 series chipset LPC Controller
 	8d60  C610/X99 series chipset sSATA Controller [IDE mode]
 	8d62  C610/X99 series chipset sSATA Controller [AHCI mode]
+		15d9 0821  X10DRW-i
 	8d64  C610/X99 series chipset sSATA Controller [RAID mode]
 	8d66  C610/X99 series chipset sSATA Controller [RAID mode]
 	8d68  C610/X99 series chipset sSATA Controller [IDE mode]
 	8d6e  C610/X99 series chipset sSATA Controller [RAID mode]
 	8d7c  C610/X99 series chipset SPSR
+		15d9 0821  X10DRW-i
 		15d9 0832  X10SRL-F
 	8d7d  C610/X99 series chipset MS SMBus 0
 	8d7e  C610/X99 series chipset MS SMBus 1
@@ -36467,6 +36875,7 @@
 	9841  Lakefield GT1.5 [UHD Graphics]
 	9a01  11th Gen Core Processor PCIe Controller #1
 	9a03  TigerLake-LP Dynamic Tuning Processor Participant
+	9a07  11th Gen Core Processor PCIe Controller #2
 	9a09  11th Gen Core Processor PCIe Controller
 	9a0b  Volume Management Device NVMe RAID Controller
 	9a0d  Tigerlake Telemetry Aggregator Driver
@@ -37206,6 +37615,7 @@
 	a74d  Raptor Lake PCIe 4.0 Graphics Port
 	a74f  GNA Scoring Accelerator module
 		1028 0c06  Precision 3580
+	a75d  Raptor Lake IPU
 	a76d  Raptor Lake-P Thunderbolt 4 NHI #1
 	a76e  Raptor Lake-P Thunderbolt 4 PCI Express Root Port #0
 	a77d  Raptor Lake Crashlog and Telemetry
@@ -37260,6 +37670,14 @@
 	ad0b  Volume Management Device NVMe RAID Controller Intel Corporation
 	ad1d  Arrow Lake NPU
 	b03e  Panther Lake NPU
+	b080  Panther Lake [Intel Graphics]
+	b081  Panther Lake [Intel Graphics]
+	b082  Panther Lake [Intel Graphics]
+	b083  Panther Lake [Intel Graphics]
+	b08f  Panther Lake [Intel Graphics]
+	b090  Panther Lake [Intel Graphics]
+	b0a0  Panther Lake [Intel Graphics]
+	b0b0  Panther Lake [Intel Graphics]
 	b152  21152 PCI-to-PCI Bridge
 		8086 b152  21152 PCI-to-PCI Bridge
 # observed, and documented in Intel revision note; new mask of 1011:0026
@@ -37271,6 +37689,7 @@
 		4c53 1050  CT7 mainboard
 		4c53 1051  CE7 mainboard
 		e4bf 1000  CC8-1-BLUES
+	b640  Arrow Lake-H [Intel Graphics]
 	d130  Core Processor DMI
 		15d9 0605  X8SIL
 	d131  Core Processor DMI
@@ -37386,6 +37805,10 @@
 		8088 2000  Ethernet Network Adaptor RP2000 for 10GbE SFP+
 		8088 2300  Ethernet Network Adaptor RP2000-A03 for 10GbE SFP+
 		8088 2400  Ethernet Network Adaptor RP2000-A04 for 10GbE SFP+
+	5025  Ethernet Controller WX5025 for 25GbE SFP28
+		8088 1000  Dual-Port Ethernet Network Adapter FF5025-DDATACXX
+	5125  Ethernet Controller WX5025AL for 25GbE SFP28
+		8088 3000  Dual-Port Ethernet Network Adapter FF5025-DDATAIXX
 80ee  InnoTek Systemberatung GmbH
 	beef  VirtualBox Graphics Adapter
 	cafe  VirtualBox Guest Service
@@ -37402,6 +37825,7 @@
 		8510 0007  GB2062-PCIe-C40
 		8510 0008  CQ2040-MXM-M60
 		8510 0009  GB2062-PCIe-C20
+		8510 000b  GB2062-PCIe-HIEILP42
 		8510 000c  CQ2040-PUB
 		8510 0201  GB2062-PUB-DDR
 # nee ScaleMP
@@ -37837,6 +38261,7 @@
 		1d49 0621  ThinkSystem RAID 9350-8i 2GB Flash PCIe 12Gb Internal Adapter
 		1d49 0622  ThinkSystem RAID 9350-16i 4GB Flash PCIe 12Gb Adapter
 		1d49 0623  ThinkSystem RAID 9350-16i 4GB Flash PCIe 12Gb Internal Adapter
+		1f3f 0610  3S610-8i, SAS/SATA 12Gb HBA
 		9005 0608  SmartRAID 3162-8i /e
 		9005 0800  SmartRAID 3154-8i
 		9005 0801  SmartRAID 3152-8i
@@ -38022,6 +38447,8 @@
 	1501  STAR1500C NVMe SSD
 	1502  STAR1500E NVMe SSD
 	1504  STAR1500L NVMe SSD
+# NVMe Gen5 Controller 16ch
+	1516  STAR1516 PCIe NVMe SSD Controller
 	2000  STAR2000 NVMe Controller
 	2001  STAR2000E NVMe SSD
 	2002  STAR2000C NVMe SSD
@@ -38126,7 +38553,9 @@ c0a9  Micron/Crucial Technology
 	5412  P5 NVMe PCIe SSD[SlashP5]
 	5415  T500 NVMe PCIe SSD
 	5419  T700 NVMe PCIe SSD
+	5420  P3 NVMe PCIe SSD (DRAM-less)
 	5421  P3 Plus NVMe PCIe SSD (DRAM-less)
+	5426  P310 NVMe PCIe SSD (DRAM-less)
 	542b  T705 NVMe PCIe SSD
 c0de  Motorola
 c0fe  Motion Engineering, Inc.
@@ -38203,31 +38632,18 @@ d209  Ultimarc
 	15a2  SpinTrak
 	1601  AimTrak
 d20c  Chengdu BeiZhongWangXin Technology Co., Ltd.
-	5010  NE5000 Ethernet Controller
 	5011  NE5000 Ethernet Controller
 		d20c e120  N5 Series 2-port 10GbE Network Adapter
 		d20c e140  N5 Series 4-port 10GbE Network Adapter
 		d20c e220  N5 Series 2-port 25GbE Network Adapter
 		d20c e221  N5S Series 2-port 25GbE Network Adapter
 		d20c e22c  N5 Series 2-port 25GbE Network Adapter for OCP
-		d20c e22d  N5S Series 2-port 25GbE Network Adapter for OCP
-	6010  NE6000 Ethernet Controller
 	6011  NE6000 Ethernet Controller
 		d20c a001  N6S Series Network Adapter
-		d20c a141  N6S Series 4-port 10GbE Network Adapter
-		d20c a221  N6S Series 2-port 25GbE Network Adapter
-		d20c a241  N6S Series 4-port 25GbE Network Adapter
-		d20c a421  N6S Series 2-port 40GbE Network Adapter
-		d20c aa21  N6S Series 2-port 100GbE Network Adapter
-		d20c d221  N6S Series 2-port 25GbE Network Adapter with DPI
-		d20c da21  N6S Series 2-port 100GbE Network Adapter with DPI
 		d20c e221  N6S Series 2-port 25GbE Network Adapter
 		d20c e281  N6S Series 8-port 25GbE Network Adapter
 		d20c e421  N6S Series 2-port 40GbE Network Adapter
-		d20c ea20  N6 Series 2-port 100GbE Network Adapter
 		d20c ea21  N6S Series 2-port 100GbE Network Adapter
-		d20c ea2c  N6 Series 2-port 100GbE Network Adapter for OCP
-		d20c ea2d  N6S Series 2-port 100GbE Network Adapter for OCP
 d4d4  Dy4 Systems Inc
 	0601  PCI Mezzanine Card
 d531  I+ME ACTIA GmbH

From 19a6bc9f51e5c5705a2b396b0da61e6536acb4cb Mon Sep 17 00:00:00 2001
From: Renato Botelho <garga@FreeBSD.org>
Date: Thu, 9 Jan 2025 09:16:10 -0300
Subject: [PATCH 084/143] fwget: Silence log() when -q is used

Summary:
Silence log function when -q parameter is used to prevent undesired output

PR:		283939
Reviewed By:	manu
Differential Revision:	https://reviews.freebsd.org/D48391
Sponsored by:	Rubicon Communications, LLC ("Netgate")
---
 usr.sbin/fwget/fwget.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/usr.sbin/fwget/fwget.sh b/usr.sbin/fwget/fwget.sh
index 3e2181e53b7361..d87cd03aa139a4 100755
--- a/usr.sbin/fwget/fwget.sh
+++ b/usr.sbin/fwget/fwget.sh
@@ -47,7 +47,9 @@ EOF
 
 log()
 {
-	echo "$@"
+	if [ "${QUIET}" != "y" ]; then
+		echo "$@"
+	fi
 }
 
 log_verbose()

From 7c94d515db900401a339cd26861856c8fefb3086 Mon Sep 17 00:00:00 2001
From: David Bright <dab@FreeBSD.org>
Date: Sun, 5 Jan 2025 11:24:13 -0600
Subject: [PATCH 085/143] aio_kqueue_test: Fix CID 1558429

Fix a Coverity error in the aio_kqueue_test that could theoretically
(but probably not realistically) cause overindexing an array.

Differential Revision: https://reviews.freebsd.org/D48328
Reviewed by: asomers, vangyzen
Sponsored by: Dell Technologies
---
 tests/sys/aio/aio_kqueue_test.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/sys/aio/aio_kqueue_test.c b/tests/sys/aio/aio_kqueue_test.c
index c2478a9d05b363..5e5cb40d07525c 100644
--- a/tests/sys/aio/aio_kqueue_test.c
+++ b/tests/sys/aio/aio_kqueue_test.c
@@ -35,6 +35,7 @@
 #include <sys/types.h>
 #include <sys/event.h>
 #include <sys/time.h>
+#include <assert.h>
 #include <aio.h>
 #include <err.h>
 #include <errno.h>
@@ -192,6 +193,7 @@ main (int argc, char *argv[])
 
 			for (j = 0; j < max_queue_per_proc && iocb[j] != kq_iocb;
 			   j++) ;
+			assert(j < max_queue_per_proc);
 #ifdef DEBUG
 			printf("kq_iocb %p\n", kq_iocb);
 

From 4a46ece6c6a90f18effbfae7ddef79b41ef43eec Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Thu, 9 Jan 2025 14:49:34 +0000
Subject: [PATCH 086/143] vmm: Fix error handling in vmm_handler()

In commit a97f683fe3c4 I didn't add code to remove the vmmctl device
when vmm.ko is unloaded, so it would persist and prevent vmm.ko from
being re-loaded.

Extend vmmdev_cleanup() to destroy the vmmctl cdev.  Also call
vmmdev_cleanup() if vmm_init() fails.

Reviewed by:	corvink, andrew
Fixes:		a97f683fe3c4 ("vmm: Add a device file interface for creating and destroying VMs")
Differential Revision:	https://reviews.freebsd.org/D48269
---
 sys/amd64/vmm/vmm.c   |  2 ++
 sys/arm64/vmm/vmm.c   | 11 ++++++++---
 sys/dev/vmm/vmm_dev.c | 34 +++++++++++++++++++---------------
 sys/riscv/vmm/vmm.c   | 11 ++++++++---
 4 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index d05d979a531a2b..aa13d506ac6af2 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -467,6 +467,8 @@ vmm_handler(module_t mod, int what, void *arg)
 			error = vmm_init();
 			if (error == 0)
 				vmm_initialized = 1;
+			else
+				(void)vmmdev_cleanup();
 		} else {
 			error = ENXIO;
 		}
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index 808df5e599ace3..77c565e3726441 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -361,21 +361,26 @@ vmm_handler(module_t mod, int what, void *arg)
 
 	switch (what) {
 	case MOD_LOAD:
-		/* TODO: if (vmm_is_hw_supported()) { */
 		error = vmmdev_init();
 		if (error != 0)
 			break;
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = true;
+		else
+			(void)vmmdev_cleanup();
 		break;
 	case MOD_UNLOAD:
-		/* TODO: if (vmm_is_hw_supported()) { */
 		error = vmmdev_cleanup();
 		if (error == 0 && vmm_initialized) {
 			error = vmmops_modcleanup();
-			if (error)
+			if (error) {
+				/*
+				 * Something bad happened - prevent new
+				 * VMs from being created
+				 */
 				vmm_initialized = false;
+			}
 		}
 		break;
 	default:
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 4ab99f92f72a3c..27c960c8ef2eba 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -979,6 +979,7 @@ vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	return (error);
 }
 
+static struct cdev *vmmctl_cdev;
 static struct cdevsw vmmctlsw = {
 	.d_name		= "vmmctl",
 	.d_version	= D_VERSION,
@@ -989,31 +990,34 @@ static struct cdevsw vmmctlsw = {
 int
 vmmdev_init(void)
 {
-	struct cdev *cdev;
 	int error;
 
-	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmctlsw, NULL,
+	sx_xlock(&vmmdev_mtx);
+	error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
 	    UID_ROOT, GID_WHEEL, 0600, "vmmctl");
-	if (error)
-		return (error);
-
-	pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
-	    "Allow use of vmm in a jail.");
+	if (error == 0)
+		pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
+		    "Allow use of vmm in a jail.");
+	sx_xunlock(&vmmdev_mtx);
 
-	return (0);
+	return (error);
 }
 
 int
 vmmdev_cleanup(void)
 {
-	int error;
-
-	if (SLIST_EMPTY(&head))
-		error = 0;
-	else
-		error = EBUSY;
+	sx_xlock(&vmmdev_mtx);
+	if (!SLIST_EMPTY(&head)) {
+		sx_xunlock(&vmmdev_mtx);
+		return (EBUSY);
+	}
+	if (vmmctl_cdev != NULL) {
+		destroy_dev(vmmctl_cdev);
+		vmmctl_cdev = NULL;
+	}
+	sx_xunlock(&vmmdev_mtx);
 
-	return (error);
+	return (0);
 }
 
 static int
diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c
index f7cbfc1dfea580..96871fc88453c7 100644
--- a/sys/riscv/vmm/vmm.c
+++ b/sys/riscv/vmm/vmm.c
@@ -259,21 +259,26 @@ vmm_handler(module_t mod, int what, void *arg)
 
 	switch (what) {
 	case MOD_LOAD:
-		/* TODO: check if has_hyp here? */
 		error = vmmdev_init();
 		if (error != 0)
 			break;
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = true;
+		else
+			(void)vmmdev_cleanup();
 		break;
 	case MOD_UNLOAD:
-		/* TODO: check if has_hyp here? */
 		error = vmmdev_cleanup();
 		if (error == 0 && vmm_initialized) {
 			error = vmmops_modcleanup();
-			if (error)
+			if (error) {
+				/*
+				 * Something bad happened - prevent new
+				 * VMs from being created
+				 */
 				vmm_initialized = false;
+			}
 		}
 		break;
 	default:

From fe1165df4b776b14b21a04d2ef3fc4c46740c2f5 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Thu, 9 Jan 2025 14:53:37 +0000
Subject: [PATCH 087/143] vm_pageout: Make vmd_oom a bool

No functional change intended.

Reviewed by:	dougm, kib
MFC after:	1 week
Sponsored by:	Klara, Inc.
Sponsored by:	Modirum MDPay
Differential Revision:	https://reviews.freebsd.org/D48376
---
 sys/vm/vm_page.c      | 2 +-
 sys/vm/vm_pageout.c   | 6 +++---
 sys/vm/vm_pagequeue.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index f351295c1af56a..f042d4767b36b9 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -460,7 +460,7 @@ vm_page_domain_init(int domain)
 	vmd->vmd_page_count = 0;
 	vmd->vmd_free_count = 0;
 	vmd->vmd_segs = 0;
-	vmd->vmd_oom = FALSE;
+	vmd->vmd_oom = false;
 	for (i = 0; i < PQ_COUNT; i++) {
 		pq = &vmd->vmd_pagequeues[i];
 		TAILQ_INIT(&pq->pq_pl);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 28a54a83fd4971..d26e04f60c0090 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1773,7 +1773,7 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
 		vmd->vmd_oom_seq++;
 	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
 		if (vmd->vmd_oom) {
-			vmd->vmd_oom = FALSE;
+			vmd->vmd_oom = false;
 			atomic_subtract_int(&vm_pageout_oom_vote, 1);
 		}
 		return;
@@ -1788,7 +1788,7 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
 	if (vmd->vmd_oom)
 		return;
 
-	vmd->vmd_oom = TRUE;
+	vmd->vmd_oom = true;
 	old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1);
 	if (old_vote != vm_ndomains - 1)
 		return;
@@ -1806,7 +1806,7 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
 	 * memory condition is still there, due to vmd_oom being
 	 * false.
 	 */
-	vmd->vmd_oom = FALSE;
+	vmd->vmd_oom = false;
 	atomic_subtract_int(&vm_pageout_oom_vote, 1);
 }
 
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index af1183e63e53c8..23a3ea96d80c70 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -257,7 +257,7 @@ struct vm_domain {
 
 	/* Paging control variables, used within single threaded page daemon. */
 	struct pidctrl vmd_pid;		/* Pageout controller. */
-	boolean_t vmd_oom;
+	bool vmd_oom;
 	u_int vmd_inactive_threads;
 	u_int vmd_inactive_shortage;		/* Per-thread shortage. */
 	blockcount_t vmd_inactive_running;	/* Number of inactive threads. */

From 55b343f4f9bc586eba5e26a2524a35f04dd60c65 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Thu, 9 Jan 2025 14:54:10 +0000
Subject: [PATCH 088/143] vm_pageout: Add a chicken switch for multithreaded
 PQ_INACTIVE scanning

Right now we have the vm.pageout_cpus_per_thread tunable which controls
the number of threads to start up per CPU per NUMA domain, but after
booting, it's not possible to disable multi-threaded scanning.

There is at least one workload where this mechanism doesn't work well;
let's make it possible to disable it without a reboot, to simplify
troubleshooting.

Reviewed by:	dougm, kib
MFC after:	2 weeks
Sponsored by:	Klara, Inc.
Sponsored by:	Modirum MDPay
Differential Revision:	https://reviews.freebsd.org/D48377
---
 sys/vm/vm_page.c      | 1 +
 sys/vm/vm_pageout.c   | 9 +++++++--
 sys/vm/vm_pagequeue.h | 5 +++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index f042d4767b36b9..ba22c7f97f2f92 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -461,6 +461,7 @@ vm_page_domain_init(int domain)
 	vmd->vmd_free_count = 0;
 	vmd->vmd_segs = 0;
 	vmd->vmd_oom = false;
+	vmd->vmd_helper_threads_enabled = true;
 	for (i = 0; i < PQ_COUNT; i++) {
 		pq = &vmd->vmd_pagequeues[i];
 		TAILQ_INIT(&pq->pq_pl);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index d26e04f60c0090..e2efa11842b5a8 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1644,8 +1644,9 @@ vm_pageout_inactive_dispatch(struct vm_domain *vmd, int shortage)
 	 * If we have more work than we can do in a quarter of our interval, we
 	 * fire off multiple threads to process it.
 	 */
-	threads = vmd->vmd_inactive_threads;
-	if (threads > 1 && vmd->vmd_inactive_pps != 0 &&
+	if ((threads = vmd->vmd_inactive_threads) > 1 &&
+	    vmd->vmd_helper_threads_enabled &&
+	    vmd->vmd_inactive_pps != 0 &&
 	    shortage > vmd->vmd_inactive_pps / VM_INACT_SCAN_RATE / 4) {
 		vmd->vmd_inactive_shortage /= threads;
 		slop = shortage % threads;
@@ -2269,6 +2270,10 @@ vm_pageout_init_domain(int domain)
 	pidctrl_init_sysctl(&vmd->vmd_pid, SYSCTL_CHILDREN(oid));
 
 	vmd->vmd_inactive_threads = get_pageout_threads_per_domain(vmd);
+	SYSCTL_ADD_BOOL(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
+	    "pageout_helper_threads_enabled", CTLFLAG_RWTUN,
+	    &vmd->vmd_helper_threads_enabled, 0,
+	    "Enable multi-threaded inactive queue scanning");
 }
 
 static void
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index 23a3ea96d80c70..72fd1bb473185e 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -257,8 +257,9 @@ struct vm_domain {
 
 	/* Paging control variables, used within single threaded page daemon. */
 	struct pidctrl vmd_pid;		/* Pageout controller. */
-	bool vmd_oom;
-	u_int vmd_inactive_threads;
+	bool vmd_oom;			/* An OOM kill was requested. */
+	bool vmd_helper_threads_enabled;/* Use multiple threads to scan. */
+	u_int vmd_inactive_threads;	/* Number of extra helper threads. */
 	u_int vmd_inactive_shortage;		/* Per-thread shortage. */
 	blockcount_t vmd_inactive_running;	/* Number of inactive threads. */
 	blockcount_t vmd_inactive_starting;	/* Number of threads started. */

From fb98fc4755def2cb8ca145751b0e54485d5e2f4a Mon Sep 17 00:00:00 2001
From: Renato Botelho <garga@FreeBSD.org>
Date: Thu, 9 Jan 2025 11:24:18 -0300
Subject: [PATCH 089/143] fwget: Simplify logic

Summary:
Use log() to print messages that should be supressed when -q is in use.

No functional changes intended.

Differential Revision:	https://reviews.freebsd.org/D48393
Reviewed By:	manu
Sponsored by:	Rubicon Communications, LLC ("Netgate")
---
 usr.sbin/fwget/fwget.sh | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/usr.sbin/fwget/fwget.sh b/usr.sbin/fwget/fwget.sh
index d87cd03aa139a4..138a2a26bfb127 100755
--- a/usr.sbin/fwget/fwget.sh
+++ b/usr.sbin/fwget/fwget.sh
@@ -120,16 +120,12 @@ done
 
 case "${packages}" in
 ""|^[[:space:]]*$)
-	if [ "${QUIET}" != "y" ]; then
-		echo "No firmware packages to install."
-	fi
+	log "No firmware packages to install."
 	exit 0
 	;;
 esac
 
-if [ "${QUIET}" != "y" ]; then
-	echo "Needed firmware packages: '${packages}'"
-fi
+log "Needed firmware packages: '${packages}'"
 if [ "${DRY_RUN}" = "y" ]; then
 	if [ "${QUIET}" = "y" ]; then
 		for pkg in ${packages}; do

From 0acab8b3d1336d4db73a9946ef76b4bcd0b0aabe Mon Sep 17 00:00:00 2001
From: Doug Ambrisko <ambrisko@FreeBSD.org>
Date: Thu, 9 Jan 2025 08:28:37 -0800
Subject: [PATCH 090/143] enic(4): fix down/up, MTU changes and more

ifconfig down/up cycles was not working.  Fix that which is required
to support MTU changes.  Now doing ifconfig enic0 mtu 3000 for example
works.  If the MTU is changes in the VIC HW configuration, that is not
reflected in and the OS reports the default 1500.  I need to look at
that but changing it via ifconfig works.  So this is different then
what Linux does.

Change TX interrupt allocation to be in this driver.  Change the admin
interrupt count to 2.  This make multiple queues work but need to be
done as pairs so if the VIC has more TX or RX queues setup in the
VIC configuration it will use the lesser value.

While updating the TX interrupt also add support for devcmd2.

Enable checksum offloading.

PR:	282095
---
 sys/dev/enic/cq_desc.h       |  15 ---
 sys/dev/enic/enic.h          |  76 +++++------
 sys/dev/enic/enic_res.c      |   4 +-
 sys/dev/enic/enic_res.h      |   2 -
 sys/dev/enic/enic_txrx.c     |  39 ++++--
 sys/dev/enic/if_enic.c       | 173 +++++++++++++++++++++++---
 sys/dev/enic/vnic_cq.h       |   5 +-
 sys/dev/enic/vnic_dev.c      | 235 ++++++++++++++++++++++++++++++-----
 sys/dev/enic/vnic_dev.h      |   8 +-
 sys/dev/enic/vnic_intr.c     |   2 +-
 sys/dev/enic/vnic_intr.h     |   2 +-
 sys/dev/enic/vnic_resource.h |   1 +
 sys/dev/enic/vnic_rq.c       |   5 +-
 sys/dev/enic/vnic_rq.h       |   1 -
 sys/dev/enic/vnic_rss.h      |   5 -
 sys/dev/enic/vnic_wq.c       | 104 +++++++++++++++-
 sys/dev/enic/vnic_wq.h       |  18 ++-
 17 files changed, 559 insertions(+), 136 deletions(-)

diff --git a/sys/dev/enic/cq_desc.h b/sys/dev/enic/cq_desc.h
index ae8847c6d9a1b2..4fb8cce7212ee4 100644
--- a/sys/dev/enic/cq_desc.h
+++ b/sys/dev/enic/cq_desc.h
@@ -44,14 +44,6 @@ struct cq_desc {
 #define CQ_DESC_COMP_NDX_BITS    12
 #define CQ_DESC_COMP_NDX_MASK    ((1 << CQ_DESC_COMP_NDX_BITS) - 1)
 
-static inline void cq_color_enc(struct cq_desc *desc, const u8 color)
-{
-	if (color)
-		desc->type_color |=  (1 << CQ_DESC_COLOR_SHIFT);
-	else
-		desc->type_color &= ~(1 << CQ_DESC_COLOR_SHIFT);
-}
-
 static inline void cq_desc_enc(struct cq_desc *desc,
 	const u8 type, const u8 color, const u16 q_number,
 	const u16 completed_index)
@@ -87,11 +79,4 @@ static inline void cq_desc_dec(const struct cq_desc *desc_arg,
 		CQ_DESC_COMP_NDX_MASK;
 }
 
-static inline void cq_color_dec(const struct cq_desc *desc_arg, u8 *color)
-{
-	volatile const struct cq_desc *desc = desc_arg;
-
-	*color = (desc->type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK;
-}
-
 #endif /* _CQ_DESC_H_ */
diff --git a/sys/dev/enic/enic.h b/sys/dev/enic/enic.h
index 8c221272654839..eec6de823c9de5 100644
--- a/sys/dev/enic/enic.h
+++ b/sys/dev/enic/enic.h
@@ -108,13 +108,13 @@ struct vnic_res {
 #define ENIC_DEFAULT_VXLAN_PORT		4789
 
 /*
- * Interrupt 0: LSC and errors
  * Interrupt 1: rx queue 0
  * Interrupt 2: rx queue 1
  * ...
+ * Interrupt x: LSC and errors
  */
 #define ENICPMD_LSC_INTR_OFFSET 0
-#define ENICPMD_RXQ_INTR_OFFSET 1
+#define ENICPMD_RXQ_INTR_OFFSET 0
 
 #include "vnic_devcmd.h"
 
@@ -152,6 +152,9 @@ struct vnic_dev {
 	u64 args[VNIC_DEVCMD_NARGS];
 	int in_reset;
 	struct vnic_intr_coal_timer_info intr_coal_timer_info;
+	struct devcmd2_controller *devcmd2;
+	int (*devcmd_rtn)(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
+	    int wait);
 	void *(*alloc_consistent)(void *priv, size_t size,
 	    bus_addr_t *dma_handle, struct iflib_dma_info *res, u8 *name);
 	void (*free_consistent)(void *priv, size_t size, void *vaddr,
@@ -175,6 +178,28 @@ struct intr_queue {
 	struct enic_softc *softc;
 };
 
+#define ENIC_MAX_LINK_SPEEDS		3
+#define ENIC_LINK_SPEED_10G		10000
+#define ENIC_LINK_SPEED_4G		4000
+#define ENIC_LINK_40G_INDEX		2
+#define ENIC_LINK_10G_INDEX		1
+#define ENIC_LINK_4G_INDEX		0
+#define ENIC_RX_COALESCE_RANGE_END	125
+#define ENIC_AIC_TS_BREAK		100
+
+struct enic_rx_coal {
+	u32 small_pkt_range_start;
+	u32 large_pkt_range_start;
+	u32 range_end;
+	u32 use_adaptive_rx_coalesce;
+};
+
+/* Store only the lower range.  Higher range is given by fw. */
+struct enic_intr_mod_range {
+	u32 small_pkt_range_start;
+	u32 large_pkt_range_start;
+};
+
 struct enic {
 	struct enic *next;
 	struct rte_pci_device *pdev;
@@ -267,6 +292,9 @@ struct enic {
 	uint64_t tx_offload_mask; /* PKT_TX flags accepted */
 	struct enic_softc *softc;
 	int port_mtu;
+	struct enic_rx_coal rx_coalesce_setting;
+	u32 rx_coalesce_usecs;
+	u32 tx_coalesce_usecs;
 };
 
 struct enic_softc {
@@ -307,11 +335,6 @@ struct enic_softc {
 
 /* Per-instance private data structure */
 
-static inline unsigned int enic_vnic_rq_count(struct enic *enic)
-{
-	return enic->rq_count;
-}
-
 static inline unsigned int enic_cq_rq(struct enic *enic, unsigned int rq)
 {
 	return rq;
@@ -322,21 +345,6 @@ static inline unsigned int enic_cq_wq(struct enic *enic, unsigned int wq)
 	return enic->rq_count + wq;
 }
 
-static inline uint32_t
-enic_ring_add(uint32_t n_descriptors, uint32_t i0, uint32_t i1)
-{
-	uint32_t d = i0 + i1;
-	d -= (d >= n_descriptors) ? n_descriptors : 0;
-	return d;
-}
-
-static inline uint32_t
-enic_ring_sub(uint32_t n_descriptors, uint32_t i0, uint32_t i1)
-{
-	int32_t d = i1 - i0;
-	return (uint32_t)((d < 0) ? ((int32_t)n_descriptors + d) : d);
-}
-
 static inline uint32_t
 enic_ring_incr(uint32_t n_descriptors, uint32_t idx)
 {
@@ -346,34 +354,14 @@ enic_ring_incr(uint32_t n_descriptors, uint32_t idx)
 	return idx;
 }
 
-void enic_free_wq(void *txq);
-int enic_alloc_intr_resources(struct enic *enic);
 int enic_setup_finish(struct enic *enic);
-int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
-		  unsigned int socket_id, uint16_t nb_desc);
 void enic_start_wq(struct enic *enic, uint16_t queue_idx);
 int enic_stop_wq(struct enic *enic, uint16_t queue_idx);
 void enic_start_rq(struct enic *enic, uint16_t queue_idx);
-void enic_free_rq(void *rxq);
-int enic_set_vnic_res(struct enic *enic);
-int enic_init_rss_nic_cfg(struct enic *enic);
-int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu);
-int enic_set_vlan_strip(struct enic *enic);
+int enic_stop_rq(struct enic *enic, uint16_t queue_idx);
+void enic_dev_disable(struct enic *enic);
 int enic_enable(struct enic *enic);
 int enic_disable(struct enic *enic);
-void enic_remove(struct enic *enic);
-int enic_get_link_status(struct enic *enic);
-void enic_dev_stats_clear(struct enic *enic);
-void enic_add_packet_filter(struct enic *enic);
-int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr);
-int enic_del_mac_address(struct enic *enic, int mac_index);
-unsigned int enic_cleanup_wq(struct enic *enic, struct vnic_wq *wq);
-
-void enic_post_wq_index(struct vnic_wq *wq);
-int enic_probe(struct enic *enic);
-int enic_clsf_init(struct enic *enic);
-void enic_clsf_destroy(struct enic *enic);
-int enic_set_mtu(struct enic *enic, uint16_t new_mtu);
 int enic_link_update(struct enic *enic);
 bool enic_use_vector_rx_handler(struct enic *enic);
 void enic_fdir_info(struct enic *enic);
diff --git a/sys/dev/enic/enic_res.c b/sys/dev/enic/enic_res.c
index d264874557a023..413873ad0fb446 100644
--- a/sys/dev/enic/enic_res.c
+++ b/sys/dev/enic/enic_res.c
@@ -95,11 +95,11 @@ int enic_get_vnic_config(struct enic *enic)
 
 	dev_info(enic_get_dev(enic),
 		"vNIC MAC addr %02x:%02x:%02x:%02x:%02x:%02x "
-		"wq/rq %d/%d mtu d, max mtu:%d\n",
+		"wq/rq %d/%d mtu %d, max mtu:%d\n",
 		enic->mac_addr[0], enic->mac_addr[1], enic->mac_addr[2],
 		enic->mac_addr[3], enic->mac_addr[4], enic->mac_addr[5],
 		c->wq_desc_count, c->rq_desc_count,
-		 /* enic->rte_dev->data->mtu, */ enic->max_mtu);
+		c->mtu, enic->max_mtu);
 	dev_info(enic_get_dev(enic), "vNIC csum tx/rx %s/%s "
 		"rss %s intr mode %s type %s timer %d usec "
 		"loopback tag 0x%04x\n",
diff --git a/sys/dev/enic/enic_res.h b/sys/dev/enic/enic_res.h
index 1a6f3a3ca98f57..82963e61a44f6c 100644
--- a/sys/dev/enic/enic_res.h
+++ b/sys/dev/enic/enic_res.h
@@ -67,7 +67,5 @@ int enic_set_nic_cfg(struct enic *enic, u8 rss_default_cpu, u8 rss_hash_type,
 	u8 ig_vlan_strip_en);
 void enic_get_res_counts(struct enic *enic);
 void enic_init_vnic_resources(struct enic *enic);
-int enic_alloc_vnic_resources(struct enic *);
-void enic_free_vnic_resources(struct enic *);
 
 #endif /* _ENIC_RES_H_ */
diff --git a/sys/dev/enic/enic_txrx.c b/sys/dev/enic/enic_txrx.c
index 5a557fc7f94aa9..169041587d0625 100644
--- a/sys/dev/enic/enic_txrx.c
+++ b/sys/dev/enic/enic_txrx.c
@@ -103,6 +103,7 @@ enic_isc_txd_encap(void *vsc, if_pkt_info_t pi)
 
 	softc = vsc;
 	enic = &softc->enic;
+	if_softc_ctx_t scctx = softc->scctx;
 
 	wq = &enic->wq[pi->ipi_qsidx];
 	nsegs = pi->ipi_nsegs;
@@ -112,6 +113,9 @@ enic_isc_txd_encap(void *vsc, if_pkt_info_t pi)
 	head_idx = wq->head_idx;
 	desc_count = wq->ring.desc_count;
 
+	if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
+		offload_mode |= WQ_ENET_OFFLOAD_MODE_CSUM;
+
 	for (i = 0; i < nsegs; i++) {
 		eop = 0;
 		cq = 0;
@@ -320,7 +324,7 @@ enic_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
 static int
 enic_legacy_intr(void *xsc)
 {
-	return -1;
+	return (1);
 }
 
 static inline void
@@ -375,7 +379,7 @@ enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type,
 
 	vnic_wq_service(&enic->wq[q_number], cq_desc,
 			completed_index, NULL, opaque);
-	return 0;
+	return (0);
 }
 
 static void
@@ -384,7 +388,7 @@ vnic_rq_service(struct vnic_rq *rq, struct cq_desc *cq_desc,
     void(*buf_service)(struct vnic_rq *rq, struct cq_desc *cq_desc,
     /* struct vnic_rq_buf * *buf, */ int skipped, void *opaque), void *opaque)
 {
-
+	if_softc_ctx_t scctx;
 	if_rxd_info_t ri = (if_rxd_info_t) opaque;
 	u8 type, color, eop, sop, ingress_port, vlan_stripped;
 	u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
@@ -396,6 +400,8 @@ vnic_rq_service(struct vnic_rq *rq, struct cq_desc *cq_desc,
 	int cqidx;
 	if_rxd_frag_t frag;
 
+	scctx = rq->vdev->softc->scctx;
+
 	cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc,
 	    &type, &color, &q_number, &completed_index,
 	    &ingress_port, &fcoe, &eop, &sop, &rss_type,
@@ -419,6 +425,11 @@ vnic_rq_service(struct vnic_rq *rq, struct cq_desc *cq_desc,
 	ri->iri_cidx = cqidx;
 	ri->iri_nfrags = 1;
 	ri->iri_len = bytes_written;
+
+	if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
+		if (!csum_not_calc && (tcp_udp_csum_ok || ipv4_csum_ok)) {
+			ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
+		}
 }
 
 static int
@@ -431,7 +442,7 @@ enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
 	vnic_rq_service(&enic->rq[ri->iri_qsidx], cq_desc, completed_index,
 	    VNIC_RQ_RETURN_DESC, NULL, /* enic_rq_indicate_buf, */ opaque);
 
-	return 0;
+	return (0);
 }
 
 void
@@ -468,10 +479,8 @@ enic_stop_wq(struct enic *enic, uint16_t queue_idx)
 	int ret;
 
 	ret = vnic_wq_disable(&enic->wq[queue_idx]);
-	if (ret)
-		return ret;
 
-	return 0;
+	return (ret);
 }
 
 void
@@ -483,3 +492,19 @@ enic_start_rq(struct enic *enic, uint16_t queue_idx)
 	vnic_rq_enable(rq);
 	enic_initial_post_rx(enic, rq);
 }
+
+int
+enic_stop_rq(struct enic *enic, uint16_t queue_idx)
+{
+	int ret;
+
+	ret = vnic_rq_disable(&enic->rq[queue_idx]);
+
+	return (ret);
+}
+
+
+void
+enic_dev_disable(struct enic *enic) {
+	vnic_dev_disable(enic->vdev);
+}
diff --git a/sys/dev/enic/if_enic.c b/sys/dev/enic/if_enic.c
index dc0c0d028e2043..26776244778e2f 100644
--- a/sys/dev/enic/if_enic.c
+++ b/sys/dev/enic/if_enic.c
@@ -201,11 +201,11 @@ static struct if_shared_ctx enic_sctx_init = {
 							 * descriptor */
 	.isc_rx_nsegments = 1,	/* One mapping per descriptor */
 	.isc_rx_maxsegsize = ENIC_DEFAULT_RX_MAX_PKT_SIZE,
-	.isc_admin_intrcnt = 3,
+	.isc_admin_intrcnt = 2,
 	.isc_vendor_info = enic_vendor_info_array,
 	.isc_driver_version = "1",
 	.isc_driver = &enic_iflib_driver,
-	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ,
+	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SKIP_MSIX,
 
 	/*
 	 * Number of receive queues per receive queue set, with associated
@@ -235,6 +235,99 @@ enic_register(device_t dev)
 	return (&enic_sctx_init);
 }
 
+static int
+enic_allocate_msix(struct enic_softc *softc) {
+	if_ctx_t ctx;
+	if_softc_ctx_t scctx;
+	if_shared_ctx_t sctx;
+	device_t dev;
+	cpuset_t cpus;
+	int queues, vectors, requested;
+	int err = 0;
+
+	dev = softc->dev;
+	ctx = softc->ctx;
+	scctx = softc->scctx;
+	sctx = iflib_get_sctx(ctx);
+
+	if (bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus) != 0) {
+		device_printf(dev, "Unable to fetch CPU list\n");
+		CPU_COPY(&all_cpus, &cpus);
+	}
+
+
+	queues = CPU_COUNT(&cpus);
+	queues = imin(queues, scctx->isc_nrxqsets);
+	queues = imin(queues, scctx->isc_ntxqsets);
+	requested = queues * 2 + sctx->isc_admin_intrcnt;
+	scctx->isc_nrxqsets = queues;
+	scctx->isc_ntxqsets = queues;
+
+	vectors = requested;
+	if ((err = pci_alloc_msix(dev, &vectors)) != 0) {
+		device_printf(dev,
+                    "failed to allocate %d MSI-X vectors, err: %d\n", requested,
+                    err);
+		err = 1;
+		goto enic_allocate_msix_out;
+	} else {
+		if (vectors != requested) {
+			device_printf(dev,
+			    "Unable to allocate sufficient MSI-X vectors "
+			     "(got %d, need %d)\n", requested, vectors);
+			pci_release_msi(dev);
+			err = 1;
+			goto enic_allocate_msix_out;
+		}
+	}
+
+	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
+	    vectors);
+
+	scctx->isc_intr = IFLIB_INTR_MSIX;
+	scctx->isc_vectors = vectors;
+
+enic_allocate_msix_out:
+	return (err);
+
+}
+
+static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = {
+	{0,  0}, /* 0  - 4  Gbps */
+	{0,  3}, /* 4  - 10 Gbps */
+	{3,  6}, /* 10 - 40 Gbps */
+};
+
+static void enic_set_rx_coal_setting(struct enic *enic)
+{
+	unsigned int speed;
+	int index = -1;
+	struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+
+	/* 1. Read the link speed from fw
+	 * 2. Pick the default range for the speed
+	 * 3. Update it in enic->rx_coalesce_setting
+	 */
+	speed = vnic_dev_port_speed(enic->vdev);
+	if (ENIC_LINK_SPEED_10G < speed)
+		index = ENIC_LINK_40G_INDEX;
+	else if (ENIC_LINK_SPEED_4G < speed)
+		index = ENIC_LINK_10G_INDEX;
+	else
+		index = ENIC_LINK_4G_INDEX;
+
+	rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start;
+	rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start;
+	rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END;
+
+	/* Start with the value provided by UCSM */
+	for (index = 0; index < enic->rq_count; index++)
+		enic->cq[index].cur_rx_coal_timeval =
+		enic->config.intr_timer_usec;
+
+	rx_coal->use_adaptive_rx_coalesce = 1;
+}
+
 static int
 enic_attach_pre(if_ctx_t ctx)
 {
@@ -283,6 +376,8 @@ enic_attach_pre(if_ctx_t ctx)
 	ENIC_LOCK(softc);
 	vnic_dev_register(vdev, &softc->mem, 1);
 	enic->vdev = vdev;
+	vnic_dev_cmd_init(enic->vdev);
+
 	vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0);
 
 	vnic_dev_cmd(vdev, CMD_INIT_v1, &a0, &a1, wait);
@@ -326,6 +421,7 @@ enic_attach_pre(if_ctx_t ctx)
 
 	/* Set ingress vlan rewrite mode before vnic initialization */
 	enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_UNTAG_DEFAULT_VLAN;
+	enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_PRIORITY_TAG_DEFAULT_VLAN;
 	err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
 						enic->ig_vlan_rewrite_mode);
 	if (err) {
@@ -360,8 +456,10 @@ enic_attach_pre(if_ctx_t ctx)
 	softc->scctx = iflib_get_softc_ctx(ctx);
 	scctx = softc->scctx;
 	scctx->isc_txrx = &enic_txrx;
-	scctx->isc_capabilities = scctx->isc_capenable = 0;
+	scctx->isc_capabilities = scctx->isc_capenable = \
+		IFCAP_HWCSUM;
 	scctx->isc_tx_csum_flags = 0;
+	if_setmtu(softc->ifp, enic->config.mtu);
 	scctx->isc_max_frame_size = enic->config.mtu + ETHER_HDR_LEN + \
 		ETHER_CRC_LEN;
 	scctx->isc_nrxqsets_max = enic->conf_rq_count;
@@ -389,7 +487,6 @@ enic_attach_pre(if_ctx_t ctx)
 	}
 	scctx->isc_tx_nsegments = 31;
 
-	scctx->isc_vectors = enic->conf_cq_count;
 	scctx->isc_msix_bar = -1;
 
 	ifmedia_add(softc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
@@ -416,12 +513,20 @@ enic_attach_pre(if_ctx_t ctx)
 	err = vnic_dev_alloc_stats_mem(enic->vdev);
 	if (err) {
 		dev_err(enic, "Failed to allocate cmd memory, aborting\n");
+		goto err_out_dev_close;
+	}
+
+        err = enic_allocate_msix(softc);
+        if (err) {
+		dev_err(enic, "Failed to allocate MSIX, aborting\n");
+		goto err_out_dev_close;
 	}
 
 	return (rc);
 
 err_out_dev_close:
 	vnic_dev_close(enic->vdev);
+	vnic_dev_deinit_devcmd2(enic->vdev);
 err_out_unregister:
 	free(softc->vdev.devcmd, M_DEVBUF);
 	free(softc->enic.intr_queues, M_DEVBUF);
@@ -482,9 +587,10 @@ enic_msix_intr_assign(if_ctx_t ctx, int msix)
 		snprintf(irq_name, sizeof(irq_name), "etxq%d:%d", i -
 		    scctx->isc_nrxqsets, device_get_unit(softc->dev));
 
-
-		iflib_softirq_alloc_generic(ctx, &enic->intr_queues[i].intr_irq, IFLIB_INTR_TX, &enic->wq[i - scctx->isc_nrxqsets], i - scctx->isc_nrxqsets, irq_name);
-
+		iflib_softirq_alloc_generic(ctx,
+		    &enic->intr_queues[i].intr_irq, IFLIB_INTR_TX,
+		    &enic->wq[i - scctx->isc_nrxqsets], i - scctx->isc_nrxqsets,
+		    irq_name);
 
 		enic->intr[i].index = i;
 		enic->intr[i].vdev = enic->vdev;
@@ -567,6 +673,7 @@ enic_attach_post(if_ctx_t ctx)
 	enic_setup_sysctl(softc);
 
 	enic_init_vnic_resources(enic);
+	enic_set_rx_coal_setting(enic);
 	enic_setup_finish(enic);
 
 	ifmedia_add(softc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
@@ -589,7 +696,9 @@ enic_detach(if_ctx_t ctx)
 	enic_free_irqs(softc);
 
 	ENIC_LOCK(softc);
+	vnic_dev_deinit(enic->vdev);
 	vnic_dev_close(enic->vdev);
+	vnic_dev_deinit_devcmd2(enic->vdev);
 	free(softc->vdev.devcmd, M_DEVBUF);
 	pci_disable_busmaster(softc->dev);
 	enic_pci_mapping_free(softc);
@@ -807,6 +916,11 @@ enic_stop(if_ctx_t ctx)
 	struct enic    *enic;
 	if_softc_ctx_t	scctx;
 	unsigned int	index;
+	struct vnic_wq *wq;
+	struct vnic_rq *rq;
+	struct vnic_cq *cq;
+	unsigned int	cq_wq, cq_rq;
+
 
 	softc = iflib_get_softc(ctx);
 	scctx = softc->scctx;
@@ -817,15 +931,36 @@ enic_stop(if_ctx_t ctx)
 	softc->link_active = 0;
 	softc->stopped = 1;
 
+	enic_dev_disable(enic);
+
 	for (index = 0; index < scctx->isc_ntxqsets; index++) {
 		enic_stop_wq(enic, index);
 		vnic_wq_clean(&enic->wq[index]);
 		vnic_cq_clean(&enic->cq[enic_cq_rq(enic, index)]);
+
+		wq = &softc->enic.wq[index];
+		wq->ring.desc_avail = wq->ring.desc_count - 1;
+		wq->ring.last_count = wq->ring.desc_count;
+		wq->head_idx = 0;
+		wq->tail_idx = 0;
+
+		cq_wq = enic_cq_wq(&softc->enic, index);
+		cq = &softc->enic.cq[cq_wq];
+		cq->ring.desc_avail = cq->ring.desc_count - 1;
 	}
 
 	for (index = 0; index < scctx->isc_nrxqsets; index++) {
+		enic_stop_rq(enic, index);
 		vnic_rq_clean(&enic->rq[index]);
 		vnic_cq_clean(&enic->cq[enic_cq_wq(enic, index)]);
+
+		rq = &softc->enic.rq[index];
+		cq_rq = enic_cq_rq(&softc->enic, index);
+		cq = &softc->enic.cq[cq_rq];
+
+		cq->ring.desc_avail = cq->ring.desc_count - 1;
+		rq->ring.desc_avail = rq->ring.desc_count - 1;
+		rq->need_initial_post = true;
 	}
 
 	for (index = 0; index < scctx->isc_vectors; index++) {
@@ -845,6 +980,9 @@ enic_init(if_ctx_t ctx)
 	scctx = softc->scctx;
 	enic = &softc->enic;
 
+
+	enic_init_vnic_resources(enic);
+
 	for (index = 0; index < scctx->isc_ntxqsets; index++)
 		enic_prep_wq_for_simple_tx(&softc->enic, index);
 
@@ -862,6 +1000,8 @@ enic_init(if_ctx_t ctx)
 	vnic_dev_enable_wait(enic->vdev);
 	ENIC_UNLOCK(softc);
 
+	softc->stopped = 0;
+
 	enic_link_status(softc);
 }
 
@@ -942,12 +1082,14 @@ enic_mtu_set(if_ctx_t ctx, uint32_t mtu)
 	softc = iflib_get_softc(ctx);
 	enic = &softc->enic;
 
+	enic_stop(softc->ctx);
 	if (mtu > enic->port_mtu){
 		return (EINVAL);
 	}
 
 	enic->config.mtu = mtu;
 	scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
+	enic_init(softc->ctx);
 
 	return (0);
 }
@@ -1026,7 +1168,6 @@ static void
 enic_update_admin_status(if_ctx_t ctx)
 {
 	struct enic_softc *softc;
-
 	softc = iflib_get_softc(ctx);
 
 	enic_link_status(softc);
@@ -1357,7 +1498,7 @@ enic_dev_init(struct enic *enic)
 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
 		   OVERLAY_CFG_VXLAN_PORT_UPDATE, ENIC_DEFAULT_VXLAN_PORT)) {
 			dev_err(enic, "failed to update vxlan port\n");
-			return -EINVAL;
+			return (EINVAL);
 		}
 	}
 	return 0;
@@ -1441,7 +1582,7 @@ enic_dev_wait(struct vnic_dev *vdev, int (*start) (struct vnic_dev *, int),
 			return 0;
 		usleep(1000);
 	}
-	return -ETIMEDOUT;
+	return (ETIMEDOUT);
 }
 
 static int
@@ -1452,7 +1593,7 @@ enic_map_bar(struct enic_softc *softc, struct enic_bar_info *bar, int bar_num,
 
 	if (bar->res != NULL) {
 		device_printf(softc->dev, "Bar %d already mapped\n", bar_num);
-		return EDOOFUS;
+		return (EDOOFUS);
 	}
 
 	bar->rid = PCIR_BAR(bar_num);
@@ -1481,20 +1622,18 @@ enic_init_vnic_resources(struct enic *enic)
 	unsigned int rxq_interrupt_enable = 0;
 	unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
 	unsigned int txq_interrupt_enable = 0;
-	unsigned int txq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
+	unsigned int txq_interrupt_offset;
 	unsigned int index = 0;
 	unsigned int cq_idx;
 	if_softc_ctx_t scctx;
 
 	scctx = enic->softc->scctx;
 
-
 	rxq_interrupt_enable = 1;
-	txq_interrupt_enable = 1;
+	txq_interrupt_enable = 0;
 
 	rxq_interrupt_offset = 0;
-	txq_interrupt_offset = enic->intr_count - 2;
-	txq_interrupt_offset = 1;
+	txq_interrupt_offset = scctx->isc_nrxqsets;
 
 	for (index = 0; index < enic->intr_count; index++) {
 		vnic_intr_alloc(enic->vdev, &enic->intr[index], index);
@@ -1568,7 +1707,7 @@ enic_update_packet_filter(struct enic *enic)
 }
 
 static bool
-enic_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
+enic_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event)
 {
 	switch (event) {
 	case IFLIB_RESTART_VLAN_CONFIG:
diff --git a/sys/dev/enic/vnic_cq.h b/sys/dev/enic/vnic_cq.h
index 26f9009612c5dd..b4549ee58c64b2 100644
--- a/sys/dev/enic/vnic_cq.h
+++ b/sys/dev/enic/vnic_cq.h
@@ -63,6 +63,8 @@ struct vnic_cq {
 	unsigned int to_clean;
 	unsigned int last_color;
 	unsigned int interrupt_offset;
+	unsigned int cur_rx_coal_timeval;
+	unsigned int tobe_rx_coal_timeval;
 #ifdef ENIC_AIC
 	struct vnic_rx_bytes_counter pkt_size_counter;
 	unsigned int cur_rx_coal_timeval;
@@ -75,15 +77,12 @@ struct vnic_cq {
 	int nrxqsets_start;
 };
 
-void vnic_cq_free(struct vnic_cq *cq);
 void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable,
     unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail,
     unsigned int cq_tail_color, unsigned int interrupt_enable,
     unsigned int cq_entry_enable, unsigned int message_enable,
     unsigned int interrupt_offset, u64 message_addr);
 void vnic_cq_clean(struct vnic_cq *cq);
-int vnic_cq_mem_size(struct vnic_cq *cq, unsigned int desc_count,
-    unsigned int desc_size);
 
 static inline unsigned int vnic_cq_service(struct vnic_cq *cq,
     unsigned int work_to_do,
diff --git a/sys/dev/enic/vnic_dev.c b/sys/dev/enic/vnic_dev.c
index 3425d7372e5683..2d555cb2b34dc3 100644
--- a/sys/dev/enic/vnic_dev.c
+++ b/sys/dev/enic/vnic_dev.c
@@ -44,7 +44,7 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev,
 	u8 type;
 
 	if (num_bars == 0)
-		return -EINVAL;
+		return (EINVAL);
 
 	rh = malloc(sizeof(*rh), M_DEVBUF, M_NOWAIT | M_ZERO);
 	mrh = malloc(sizeof(*mrh), M_DEVBUF, M_NOWAIT | M_ZERO);
@@ -52,7 +52,7 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev,
 		pr_err("vNIC BAR0 res hdr not mem-mapped\n");
 		free(rh, M_DEVBUF);
 		free(mrh, M_DEVBUF);
-		return -EINVAL;
+		return (EINVAL);
 	}
 
 	/* Check for mgmt vnic in addition to normal vnic */
@@ -69,7 +69,7 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev,
 				rh->magic, rh->version);
 			free(rh, M_DEVBUF);
 			free(mrh, M_DEVBUF);
-			return -EINVAL;
+			return (EINVAL);
 		}
 	}
 
@@ -97,6 +97,7 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev,
 		case RES_TYPE_INTR_CTRL:
 		case RES_TYPE_INTR_PBA_LEGACY:
 		case RES_TYPE_DEVCMD:
+		case RES_TYPE_DEVCMD2:
 			break;
 		default:
 			ENIC_BUS_READ_REGION_4(softc, mem, r_offset, (void *)r, sizeof(*r) / 4);
@@ -189,12 +190,12 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
 	status = ENIC_BUS_READ_4(devcmd, DEVCMD_STATUS);
 	if (status == 0xFFFFFFFF) {
 		/* PCI-e target device is gone */
-		return -ENODEV;
+		return (ENODEV);
 	}
 	if (status & STAT_BUSY) {
 
 		pr_err("Busy devcmd %d\n",  _CMD_N(cmd));
-		return -EBUSY;
+		return (EBUSY);
 	}
 
 	if (_CMD_DIR(cmd) & _CMD_DIR_WRITE) {
@@ -214,7 +215,7 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
 		status = ENIC_BUS_READ_4(devcmd, DEVCMD_STATUS);
 		if (status == 0xFFFFFFFF) {
 			/* PCI-e target device is gone */
-			return -ENODEV;
+			return (ENODEV);
 		}
 
 		if (!(status & STAT_BUSY)) {
@@ -225,7 +226,7 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
 					pr_err("Devcmd %d failed " \
 						"with error code %d\n",
 						_CMD_N(cmd), err);
-				return err;
+				return (err);
 			}
 
 			if (_CMD_DIR(cmd) & _CMD_DIR_READ) {
@@ -237,7 +238,82 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
 	}
 
 	pr_err("Timedout devcmd %d\n", _CMD_N(cmd));
-	return -ETIMEDOUT;
+	return (ETIMEDOUT);
+}
+
+static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
+	int wait)
+{
+	struct devcmd2_controller *dc2c = vdev->devcmd2;
+	struct devcmd2_result *result;
+	u8 color;
+	unsigned int i;
+	u32 fetch_index, new_posted;
+	int delay, err;
+	u32 posted = dc2c->posted;
+
+	fetch_index = ENIC_BUS_READ_4(dc2c->wq_ctrl, TX_FETCH_INDEX);
+	if (fetch_index == 0xFFFFFFFF)
+		return (ENODEV);
+
+	new_posted = (posted + 1) % DEVCMD2_RING_SIZE;
+
+	if (new_posted == fetch_index) {
+		device_printf(dev_from_vnic_dev(vdev),
+		    "devcmd2 %d: wq is full. fetch index: %u, posted index: %u\n",
+		    _CMD_N(cmd), fetch_index, posted);
+		return (EBUSY);
+	}
+
+	dc2c->cmd_ring[posted].cmd = cmd;
+	dc2c->cmd_ring[posted].flags = 0;
+
+	if ((_CMD_FLAGS(cmd) & _CMD_FLAGS_NOWAIT))
+		dc2c->cmd_ring[posted].flags |= DEVCMD2_FNORESULT;
+	if (_CMD_DIR(cmd) & _CMD_DIR_WRITE)
+		for (i = 0; i < VNIC_DEVCMD_NARGS; i++)
+			dc2c->cmd_ring[posted].args[i] = vdev->args[i];
+
+	ENIC_BUS_WRITE_4(dc2c->wq_ctrl, TX_POSTED_INDEX, new_posted);
+	dc2c->posted = new_posted;
+
+	if (dc2c->cmd_ring[posted].flags & DEVCMD2_FNORESULT)
+		return (0);
+
+	result = dc2c->result + dc2c->next_result;
+	color = dc2c->color;
+
+	dc2c->next_result++;
+	if (dc2c->next_result == dc2c->result_size) {
+		dc2c->next_result = 0;
+		dc2c->color = dc2c->color ? 0 : 1;
+	}
+
+	for (delay = 0; delay < wait; delay++) {
+		if (result->color == color) {
+			if (result->error) {
+				err = result->error;
+				if (err != ERR_ECMDUNKNOWN ||
+				     cmd != CMD_CAPABILITY)
+					device_printf(dev_from_vnic_dev(vdev),
+					     "Error %d devcmd %d\n", err,
+					     _CMD_N(cmd));
+				return (err);
+			}
+			if (_CMD_DIR(cmd) & _CMD_DIR_READ)
+				for (i = 0; i < VNIC_DEVCMD2_NARGS; i++)
+					vdev->args[i] = result->results[i];
+
+			return 0;
+		}
+		udelay(100);
+	}
+
+	device_printf(dev_from_vnic_dev(vdev),
+	    "devcmd %d timed out\n", _CMD_N(cmd));
+
+
+	return (ETIMEDOUT);
 }
 
 static int vnic_dev_cmd_proxy(struct vnic_dev *vdev,
@@ -253,7 +329,7 @@ static int vnic_dev_cmd_proxy(struct vnic_dev *vdev,
 	 */
 	if (nargs > VNIC_DEVCMD_NARGS - 2) {
 		pr_err("number of args %d exceeds the maximum\n", nargs);
-		return -EINVAL;
+		return (EINVAL);
 	}
 	memset(vdev->args, 0, sizeof(vdev->args));
 
@@ -261,9 +337,9 @@ static int vnic_dev_cmd_proxy(struct vnic_dev *vdev,
 	vdev->args[1] = cmd;
 	memcpy(&vdev->args[2], args, nargs * sizeof(args[0]));
 
-	err = _vnic_dev_cmd(vdev, proxy_cmd, wait);
+	err = vdev->devcmd_rtn(vdev, proxy_cmd, wait);
 	if (err)
-		return err;
+		return (err);
 
 	status = (u32)vdev->args[0];
 	if (status & STAT_ERROR) {
@@ -271,7 +347,7 @@ static int vnic_dev_cmd_proxy(struct vnic_dev *vdev,
 		if (err != ERR_ECMDUNKNOWN ||
 		    cmd != CMD_CAPABILITY)
 			pr_err("Error %d proxy devcmd %d\n", err, _CMD_N(cmd));
-		return err;
+		return (err);
 	}
 
 	memcpy(args, &vdev->args[1], nargs * sizeof(args[0]));
@@ -286,16 +362,16 @@ static int vnic_dev_cmd_no_proxy(struct vnic_dev *vdev,
 
 	if (nargs > VNIC_DEVCMD_NARGS) {
 		pr_err("number of args %d exceeds the maximum\n", nargs);
-		return -EINVAL;
+		return (EINVAL);
 	}
 	memset(vdev->args, 0, sizeof(vdev->args));
 	memcpy(vdev->args, args, nargs * sizeof(args[0]));
 
-	err = _vnic_dev_cmd(vdev, cmd, wait);
+	err = vdev->devcmd_rtn(vdev, cmd, wait);
 
 	memcpy(args, vdev->args, nargs * sizeof(args[0]));
 
-	return err;
+	return (err);
 }
 
 int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
@@ -328,7 +404,7 @@ int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
 		*a1 = args[1];
 	}
 
-	return err;
+	return (err);
 }
 
 int vnic_dev_cmd_args(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
@@ -400,7 +476,7 @@ int vnic_dev_capable_filter_mode(struct vnic_dev *vdev, u32 *mode,
 		args[1] = 0;
 		err = vnic_dev_cmd_args(vdev, CMD_CAPABILITY, args, 2, 1000);
 		if (err)
-			return err;
+			return (err);
 		max_level = args[1];
 		goto parse_max_level;
 	} else if (args[2] == FILTER_CAP_MODE_V1) {
@@ -479,7 +555,7 @@ int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, size_t size,
 		break;
 	}
 
-	return err;
+	return (err);
 }
 
 int vnic_dev_stats_clear(struct vnic_dev *vdev)
@@ -497,7 +573,7 @@ int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats)
 	int rc;
 
 	if (!vdev->stats)
-		return -ENOMEM;
+		return (ENOMEM);
 
 	*stats = vdev->stats;
 	a0 = vdev->stats_res.idi_paddr;
@@ -524,10 +600,10 @@ int vnic_dev_counter_dma_cfg(struct vnic_dev *vdev, u32 period,
 	int err;
 
 	if (num_counters > VNIC_MAX_FLOW_COUNTERS)
-		return -ENOMEM;
+		return (ENOMEM);
 	if (period > 0 && (period < VNIC_COUNTER_DMA_MIN_PERIOD ||
 	    num_counters == 0))
-		return -EINVAL;
+		return (EINVAL);
 
 	args[0] = num_counters;
 	args[1] = vdev->flow_counters_res.idi_paddr;
@@ -545,7 +621,7 @@ int vnic_dev_counter_dma_cfg(struct vnic_dev *vdev, u32 period,
 		vdev->flow_counters_dma_active = (num_counters != 0 &&
 						  period != 0);
 
-	return err;
+	return (err);
 }
 
 int vnic_dev_close(struct vnic_dev *vdev)
@@ -593,7 +669,7 @@ int vnic_dev_open_done(struct vnic_dev *vdev, int *done)
 
 	err = vnic_dev_cmd(vdev, CMD_OPEN_STATUS, &a0, &a1, wait);
 	if (err)
-		return err;
+		return (err);
 
 	*done = (a0 == 0);
 
@@ -611,7 +687,7 @@ int vnic_dev_get_mac_addr(struct vnic_dev *vdev, u8 *mac_addr)
 
 	err = vnic_dev_cmd(vdev, CMD_GET_MAC_ADDR, &a0, &a1, wait);
 	if (err)
-		return err;
+		return (err);
 
 	for (i = 0; i < ETH_ALEN; i++)
 		mac_addr[i] = ((u8 *)&a0)[i];
@@ -636,7 +712,7 @@ int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
 	if (err)
 		pr_err("Can't set packet filter\n");
 
-	return err;
+	return (err);
 }
 
 int vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr)
@@ -655,7 +731,7 @@ int vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr)
 			addr[0], addr[1], addr[2], addr[3], addr[4], addr[5],
 			err);
 
-	return err;
+	return (err);
 }
 
 int vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr)
@@ -674,7 +750,7 @@ int vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr)
 			addr[0], addr[1], addr[2], addr[3], addr[4], addr[5],
 			err);
 
-	return err;
+	return (err);
 }
 
 int vnic_dev_set_ig_vlan_rewrite_mode(struct vnic_dev *vdev,
@@ -771,7 +847,7 @@ int vnic_dev_notify_unsetcmd(struct vnic_dev *vdev)
 		vdev->notify_sz = 0;
 	}
 
-	return err;
+	return (err);
 }
 
 int vnic_dev_notify_unset(struct vnic_dev *vdev)
@@ -807,7 +883,8 @@ static int vnic_dev_notify_ready(struct vnic_dev *vdev)
 			csum += words[i];
 	} while (csum != words[0]);
 
-	return 1;
+
+	return (1);
 }
 
 int vnic_dev_init(struct vnic_dev *vdev, int arg)
@@ -923,7 +1000,7 @@ int vnic_dev_alloc_counter_mem(struct vnic_dev *vdev)
 	iflib_dma_alloc(softc->ctx, sizeof(struct vnic_counter_counts) * VNIC_MAX_FLOW_COUNTERS, &vdev->flow_counters_res, 0);
 	vdev->flow_counters = (struct vnic_counter_counts *)vdev->flow_counters_res.idi_vaddr;
 	vdev->flow_counters_dma_active = 0;
-	return vdev->flow_counters == NULL ? -ENOMEM : 0;
+	return (vdev->flow_counters == NULL ? ENOMEM : 0);
 }
 
 struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
@@ -942,6 +1019,85 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
 	return NULL;
 }
 
+static int vnic_dev_init_devcmd1(struct vnic_dev *vdev)
+{
+	vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0);
+	if (!vdev->devcmd)
+		return (ENODEV);
+	vdev->devcmd_rtn = _vnic_dev_cmd;
+
+	return 0;
+}
+
+static int vnic_dev_init_devcmd2(struct vnic_dev *vdev)
+{
+	int err;
+	unsigned int fetch_index;
+
+
+	err = 0;
+
+	if (vdev->devcmd2)
+		return (0);
+
+	vdev->devcmd2 = malloc(sizeof(*vdev->devcmd2), M_DEVBUF,
+	    M_NOWAIT | M_ZERO);
+
+	if (!vdev->devcmd2) {
+		return (ENOMEM);
+	}
+
+	vdev->devcmd2->color = 1;
+	vdev->devcmd2->result_size = DEVCMD2_RING_SIZE;
+
+	err = enic_wq_devcmd2_alloc(vdev, &vdev->devcmd2->wq, DEVCMD2_RING_SIZE,
+	    DEVCMD2_DESC_SIZE);
+
+	if (err) {
+		goto err_free_devcmd2;
+	}
+	vdev->devcmd2->wq_ctrl = vdev->devcmd2->wq.ctrl;
+	vdev->devcmd2->cmd_ring = vdev->devcmd2->wq.ring.descs;
+
+	fetch_index = ENIC_BUS_READ_4(vdev->devcmd2->wq.ctrl, TX_FETCH_INDEX);
+	if (fetch_index == 0xFFFFFFFF)
+		return (ENODEV);
+
+	enic_wq_init_start(&vdev->devcmd2->wq, 0, fetch_index, fetch_index, 0,
+	    0);
+	vdev->devcmd2->posted = fetch_index;
+	vnic_wq_enable(&vdev->devcmd2->wq);
+
+	err = vnic_dev_alloc_desc_ring(vdev, &vdev->devcmd2->results_ring,
+            DEVCMD2_RING_SIZE, DEVCMD2_DESC_SIZE);
+        if (err)
+                goto err_free_devcmd2;
+
+	vdev->devcmd2->result = vdev->devcmd2->results_ring.descs;
+	vdev->args[0] = (u64)vdev->devcmd2->results_ring.base_addr |
+	    VNIC_PADDR_TARGET;
+	vdev->args[1] = DEVCMD2_RING_SIZE;
+
+	err = _vnic_dev_cmd2(vdev, CMD_INITIALIZE_DEVCMD2, 1000);
+	if (err)
+		goto err_free_devcmd2;
+
+	vdev->devcmd_rtn = _vnic_dev_cmd2;
+
+	return (err);
+
+err_free_devcmd2:
+	err = ENOMEM;
+	if (vdev->devcmd2->wq_ctrl)
+		vnic_wq_free(&vdev->devcmd2->wq);
+	if (vdev->devcmd2->result)
+		vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring);
+	free(vdev->devcmd2, M_DEVBUF);
+	vdev->devcmd2 = NULL;
+
+	return (err);
+}
+
 /*
  *  vnic_dev_classifier: Add/Delete classifier entries
  *  @vdev: vdev of the device
@@ -1037,3 +1193,22 @@ bool vnic_dev_counter_query(struct vnic_dev *vdev, uint32_t idx,
 device_t dev_from_vnic_dev(struct vnic_dev *vdev) {
 	return (vdev->softc->dev);
 }
+
+int vnic_dev_cmd_init(struct vnic_dev *vdev) {
+	int err;
+	void __iomem *res;
+
+	res = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD2, 0);
+	if (res) {
+		err = vnic_dev_init_devcmd2(vdev);
+		if (err)
+			device_printf(dev_from_vnic_dev(vdev),
+			    "DEVCMD2 init failed, Using DEVCMD1\n");
+		else
+			return 0;
+	}
+
+	err = vnic_dev_init_devcmd1(vdev);
+
+	return (err);
+}
diff --git a/sys/dev/enic/vnic_dev.h b/sys/dev/enic/vnic_dev.h
index f8ca29f4e175b2..5e2d01d985f3f5 100644
--- a/sys/dev/enic/vnic_dev.h
+++ b/sys/dev/enic/vnic_dev.h
@@ -38,6 +38,7 @@ struct vnic_dev_ring {
 	unsigned int desc_count;
 	unsigned int desc_avail;
 	unsigned int last_count;
+	iflib_dma_info_t ifdip;
 };
 
 struct vnic_dev_iomap_info {
@@ -69,6 +70,10 @@ unsigned long vnic_dev_get_res_type_len(struct vnic_dev *vdev,
 unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring,
     unsigned int desc_count, unsigned int desc_size);
 void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring);
+int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring,
+    unsigned int desc_count, unsigned int desc_size);
+void vnic_dev_free_desc_ring(struct vnic_dev *vdev,
+    struct vnic_dev_ring *ring);
 int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
     u64 *a0, u64 *a1, int wait);
 int vnic_dev_cmd_args(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
@@ -143,7 +148,7 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
 struct rte_pci_device *vnic_dev_get_pdev(struct vnic_dev *vdev);
 int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev);
 int vnic_dev_alloc_counter_mem(struct vnic_dev *vdev);
-int vnic_dev_cmd_init(struct vnic_dev *vdev, int fallback);
+int vnic_dev_cmd_init(struct vnic_dev *vdev);
 int vnic_dev_get_size(void);
 int vnic_dev_int13(struct vnic_dev *vdev, u64 arg, u32 op);
 int vnic_dev_perbi(struct vnic_dev *vdev, u64 arg, u32 op);
@@ -164,6 +169,7 @@ bool vnic_dev_counter_alloc(struct vnic_dev *vdev, uint32_t *idx);
 bool vnic_dev_counter_free(struct vnic_dev *vdev, uint32_t idx);
 bool vnic_dev_counter_query(struct vnic_dev *vdev, uint32_t idx,
     bool reset, uint64_t *packets, uint64_t *bytes);
+void vnic_dev_deinit_devcmd2(struct vnic_dev *vdev);
 
 device_t dev_from_vnic_dev(struct vnic_dev *vdev);
 
diff --git a/sys/dev/enic/vnic_intr.c b/sys/dev/enic/vnic_intr.c
index 38e2ea6e066bbd..8a6494efd5f311 100644
--- a/sys/dev/enic/vnic_intr.c
+++ b/sys/dev/enic/vnic_intr.c
@@ -21,7 +21,7 @@ int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr,
 	intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index);
 	if (!intr->ctrl) {
 		pr_err("Failed to hook INTR[%d].ctrl resource\n", index);
-		return -EINVAL;
+		return (EINVAL);
 	}
 
 	return 0;
diff --git a/sys/dev/enic/vnic_intr.h b/sys/dev/enic/vnic_intr.h
index 22db66096aaefb..6d1e8e1cf050c5 100644
--- a/sys/dev/enic/vnic_intr.h
+++ b/sys/dev/enic/vnic_intr.h
@@ -76,7 +76,7 @@ static inline void vnic_intr_return_credits(struct vnic_intr *intr,
 
 static inline unsigned int vnic_intr_credits(struct vnic_intr *intr)
 {
-	return ENIC_BUS_READ_4(intr->ctrl, INTR_CREDITS);
+	return (ENIC_BUS_READ_4(intr->ctrl, INTR_CREDITS));
 }
 
 static inline void vnic_intr_return_all_credits(struct vnic_intr *intr)
diff --git a/sys/dev/enic/vnic_resource.h b/sys/dev/enic/vnic_resource.h
index 184bfa7401df87..d365b8d914bac8 100644
--- a/sys/dev/enic/vnic_resource.h
+++ b/sys/dev/enic/vnic_resource.h
@@ -39,6 +39,7 @@ enum vnic_res_type {
 	RES_TYPE_MQ_RQ,                 /* MQ Receive queues */
 	RES_TYPE_MQ_CQ,                 /* MQ Completion queues */
 	RES_TYPE_DEPRECATED1,           /* Old version of devcmd 2 */
+	RES_TYPE_DEPRECATED2,           /* Old version of devcmd 2 */
 	RES_TYPE_DEVCMD2,               /* Device control region */
 	RES_TYPE_MAX,			/* Count of resource types */
 };
diff --git a/sys/dev/enic/vnic_rq.c b/sys/dev/enic/vnic_rq.c
index 3720da5f9aa69b..ef30563fa2f347 100644
--- a/sys/dev/enic/vnic_rq.c
+++ b/sys/dev/enic/vnic_rq.c
@@ -40,6 +40,7 @@ void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
 		fetch_index = 0;
 	}
 
+	fetch_index = 0;
 	vnic_rq_init_start(rq, cq_index,
 		fetch_index, fetch_index,
 		error_interrupt_enable,
@@ -50,7 +51,7 @@ void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
 
 unsigned int vnic_rq_error_status(struct vnic_rq *rq)
 {
-	return ENIC_BUS_READ_4(rq->ctrl, RX_ERROR_STATUS);
+	return (ENIC_BUS_READ_4(rq->ctrl, RX_ERROR_STATUS));
 }
 
 void vnic_rq_enable(struct vnic_rq *rq)
@@ -73,7 +74,7 @@ int vnic_rq_disable(struct vnic_rq *rq)
 
 	pr_err("Failed to disable RQ[%d]\n", rq->index);
 
-	return -ETIMEDOUT;
+	return (ETIMEDOUT);
 }
 
 void vnic_rq_clean(struct vnic_rq *rq)
diff --git a/sys/dev/enic/vnic_rq.h b/sys/dev/enic/vnic_rq.h
index ae8c1fdc39bdaf..9e3d239809c424 100644
--- a/sys/dev/enic/vnic_rq.h
+++ b/sys/dev/enic/vnic_rq.h
@@ -133,7 +133,6 @@ void vnic_rq_init_start(struct vnic_rq *rq, unsigned int cq_index,
 void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
     unsigned int error_interrupt_enable,
     unsigned int error_interrupt_offset);
-void vnic_rq_error_out(struct vnic_rq *rq, unsigned int error);
 unsigned int vnic_rq_error_status(struct vnic_rq *rq);
 void vnic_rq_enable(struct vnic_rq *rq);
 int vnic_rq_disable(struct vnic_rq *rq);
diff --git a/sys/dev/enic/vnic_rss.h b/sys/dev/enic/vnic_rss.h
index abd7b9f131aa9b..039041ece5b2c4 100644
--- a/sys/dev/enic/vnic_rss.h
+++ b/sys/dev/enic/vnic_rss.h
@@ -24,9 +24,4 @@ union vnic_rss_cpu {
 	u64 raw[32];
 };
 
-void vnic_set_rss_key(union vnic_rss_key *rss_key, u8 *key);
-void vnic_set_rss_cpu(union vnic_rss_cpu *rss_cpu, u8 *cpu);
-void vnic_get_rss_key(union vnic_rss_key *rss_key, u8 *key);
-void vnic_get_rss_cpu(union vnic_rss_cpu *rss_cpu, u8 *cpu);
-
 #endif /* _VNIC_RSS_H_ */
diff --git a/sys/dev/enic/vnic_wq.c b/sys/dev/enic/vnic_wq.c
index b032df3392b2ed..995af3270a21df 100644
--- a/sys/dev/enic/vnic_wq.c
+++ b/sys/dev/enic/vnic_wq.c
@@ -7,7 +7,103 @@
 #include "vnic_dev.h"
 #include "vnic_wq.h"
 
-void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index,
+int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev,
+    struct vnic_dev_ring *ring, unsigned int desc_count, unsigned int desc_size)
+{
+	iflib_dma_info_t ifdip;
+	int err;
+
+	if ((ifdip = malloc(sizeof(struct iflib_dma_info),
+	    M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) {
+		device_printf(dev_from_vnic_dev(vdev),
+		"Unable to allocate DMA info memory\n");
+		return (ENOMEM);
+	}
+
+	err = iflib_dma_alloc(vdev->softc->ctx, desc_count * desc_size,
+	    ifdip, 0);
+	if (err) {
+		device_printf(dev_from_vnic_dev(vdev),
+		    "Unable to allocate DEVCMD2 descriptors\n");
+		err = ENOMEM;
+		goto err_out_alloc;
+	}
+
+	ring->base_addr = ifdip->idi_paddr;
+	ring->descs = ifdip->idi_vaddr;
+	ring->ifdip = ifdip;
+	ring->desc_size = desc_size;
+	ring->desc_count = desc_count;
+	ring->last_count = 0;
+	ring->desc_avail = ring->desc_count - 1;
+
+	ring->size = ring->desc_count * ring->desc_size;
+	ring->base_align = 512;
+	ring->size_unaligned = ring->size + ring->base_align;
+
+	return (err);
+
+	iflib_dma_free(ifdip);
+
+err_out_alloc:
+	free(ifdip, M_DEVBUF);
+	return (err);
+}
+
+void vnic_dev_free_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring)
+{
+	if (ring && ring->descs) {
+		iflib_dma_free(ring->ifdip);
+		free(ring->ifdip, M_DEVBUF);
+		ring->descs = NULL;
+	}
+}
+
+void vnic_wq_free(struct vnic_wq *wq) {
+	vnic_dev_free_desc_ring(wq->vdev, &wq->ring);
+	wq->ctrl = NULL;
+}
+
+int enic_wq_devcmd2_alloc(struct vnic_dev *vdev, struct vnic_wq *wq,
+                          unsigned int desc_count, unsigned int desc_size)
+{
+	int err;
+
+	wq->index = 0;
+	wq->vdev = vdev;
+
+
+	wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD2, 0);
+	if (!wq->ctrl)
+		return (EINVAL);
+	vnic_wq_disable(wq);
+	err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size);
+
+	return (err);
+}
+
+void vnic_dev_deinit_devcmd2(struct vnic_dev *vdev)
+{
+	if (vdev->devcmd2) {
+		vnic_wq_disable(&vdev->devcmd2->wq);
+		if (vdev->devcmd2->wq_ctrl)
+			vnic_wq_free(&vdev->devcmd2->wq);
+		if (vdev->devcmd2->result)
+			vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring);
+		free(vdev->devcmd2, M_DEVBUF);
+		vdev->devcmd2 = NULL;
+	}
+}
+
+int vnic_dev_deinit(struct vnic_dev *vdev) {
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+
+	return (vnic_dev_cmd(vdev, CMD_DEINIT, &a0, &a1, wait));
+	return (0);
+}
+
+void enic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index,
     unsigned int fetch_index, unsigned int posted_index,
     unsigned int error_interrupt_enable,
     unsigned int error_interrupt_offset)
@@ -33,7 +129,7 @@ void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index,
     unsigned int error_interrupt_enable,
     unsigned int error_interrupt_offset)
 {
-	vnic_wq_init_start(wq, cq_index, 0, 0,
+	enic_wq_init_start(wq, cq_index, 0, 0,
 		error_interrupt_enable,
 		error_interrupt_offset);
 	wq->cq_pend = 0;
@@ -42,7 +138,7 @@ void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index,
 
 unsigned int vnic_wq_error_status(struct vnic_wq *wq)
 {
-	return ENIC_BUS_READ_4(wq->ctrl, TX_ERROR_STATUS);
+	return (ENIC_BUS_READ_4(wq->ctrl, TX_ERROR_STATUS));
 }
 
 void vnic_wq_enable(struct vnic_wq *wq)
@@ -65,7 +161,7 @@ int vnic_wq_disable(struct vnic_wq *wq)
 
 	pr_err("Failed to disable WQ[%d]\n", wq->index);
 
-	return -ETIMEDOUT;
+	return (ETIMEDOUT);
 }
 
 void vnic_wq_clean(struct vnic_wq *wq)
diff --git a/sys/dev/enic/vnic_wq.h b/sys/dev/enic/vnic_wq.h
index c4f551de844117..9ef492adba24e8 100644
--- a/sys/dev/enic/vnic_wq.h
+++ b/sys/dev/enic/vnic_wq.h
@@ -61,6 +61,20 @@ struct vnic_wq {
 	uint64_t offloads;
 };
 
+struct devcmd2_controller {
+	struct vnic_res *wq_ctrl;
+	struct vnic_devcmd2 *cmd_ring;
+	struct devcmd2_result *result;
+	u16 next_result;
+	u16 result_size;
+	int color;
+	struct vnic_dev_ring results_ring;
+	struct vnic_res *results_ctrl;
+	struct vnic_wq wq;
+	u32 posted;
+};
+
+
 static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq)
 {
 	/* how many does SW own? */
@@ -92,7 +106,7 @@ buf_idx_incr(uint32_t n_descriptors, uint32_t idx)
 }
 
 void vnic_wq_free(struct vnic_wq *wq);
-void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index,
+void enic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index,
     unsigned int fetch_index, unsigned int posted_index,
     unsigned int error_interrupt_enable,
     unsigned int error_interrupt_offset);
@@ -104,5 +118,7 @@ unsigned int vnic_wq_error_status(struct vnic_wq *wq);
 void vnic_wq_enable(struct vnic_wq *wq);
 int vnic_wq_disable(struct vnic_wq *wq);
 void vnic_wq_clean(struct vnic_wq *wq);
+int enic_wq_devcmd2_alloc(struct vnic_dev *vdev, struct vnic_wq *wq,
+    unsigned int desc_count, unsigned int desc_size);
 
 #endif /* _VNIC_WQ_H_ */

From 2cadbe468a8e8aef193429565b729d34ec48b266 Mon Sep 17 00:00:00 2001
From: Minsoo Choo <minsoochoo0122@proton.me>
Date: Fri, 5 Apr 2024 20:30:50 -0400
Subject: [PATCH 091/143] tcp_wrappers: Use default C standard version

Reviewed by:	emaste, arichardson, jhb
Differential Revision: https://reviews.freebsd.org/D43236
---
 contrib/tcp_wrappers/tcpd.c | 1 +
 libexec/tcpd/Makefile       | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/tcp_wrappers/tcpd.c b/contrib/tcp_wrappers/tcpd.c
index a0ba42f71f8650..164132570a3668 100644
--- a/contrib/tcp_wrappers/tcpd.c
+++ b/contrib/tcp_wrappers/tcpd.c
@@ -44,6 +44,7 @@ static char sccsid[] = "@(#) tcpd.c 1.10 96/02/11 17:01:32";
 int     allow_severity = SEVERITY;	/* run-time adjustable */
 int     deny_severity = LOG_WARNING;	/* ditto */
 
+int
 main(int argc, char **argv)
 {
     struct request_info request;
diff --git a/libexec/tcpd/Makefile b/libexec/tcpd/Makefile
index bb8f09ca13acce..4845013f748918 100644
--- a/libexec/tcpd/Makefile
+++ b/libexec/tcpd/Makefile
@@ -6,7 +6,6 @@ PACKAGE=	tcpd
 
 PROG=	tcpd
 MAN=	tcpd.8
-CSTD?=	c89
 CFLAGS+=-DREAL_DAEMON_DIR=\"${LIBEXECDIR}\" \
 	-DSEVERITY=LOG_INFO -DRFC931_TIMEOUT=10 \
 	-DHOSTS_DENY=\"/etc/hosts.deny\" -DHOSTS_ALLOW=\"/etc/hosts.allow\" \

From d0d7fcbae4207402b35f37cabe2bb5f30bec7c5d Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Thu, 9 Jan 2025 12:52:50 -0500
Subject: [PATCH 092/143] dumpon: Move the _Noreturn keyword before the return
 type

This fixes a warning from GCC 14 when compiling with the native C11
_Noreturn rather than the older GNU C function attribute:

sbin/dumpon/dumpon.c:73:1: error: '_Noreturn' is not at beginning of declaration [-Werror=old-style-declaration]
   73 | static void _Noreturn
      | ^~~~~~
---
 sbin/dumpon/dumpon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sbin/dumpon/dumpon.c b/sbin/dumpon/dumpon.c
index 46652d8471ebde..e6c1634ff6fe38 100644
--- a/sbin/dumpon/dumpon.c
+++ b/sbin/dumpon/dumpon.c
@@ -70,7 +70,7 @@
 
 static int	verbose;
 
-static void _Noreturn
+static _Noreturn void
 usage(void)
 {
 	fprintf(stderr,

From c6eb7f3fbffd9065ab75a2ed266f1b069fd97e6e Mon Sep 17 00:00:00 2001
From: Minsoo Choo <minsoochoo0122@proton.me>
Date: Thu, 9 Jan 2025 13:28:12 -0500
Subject: [PATCH 093/143] zstd: Add a stub <assert.h> for the kernel

The stub header includes <sys/kassert.h>.  zstd's xx_hash.h #includes
<assert.h> for the definition of static_assert() when building with
C11 or newer.

Reviewed by:	jhb
Differential Revision:	https://reviews.freebsd.org/D43239
---
 sys/contrib/zstd/lib/freebsd/assert.h | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 sys/contrib/zstd/lib/freebsd/assert.h

diff --git a/sys/contrib/zstd/lib/freebsd/assert.h b/sys/contrib/zstd/lib/freebsd/assert.h
new file mode 100644
index 00000000000000..eb2efe9be6c0b4
--- /dev/null
+++ b/sys/contrib/zstd/lib/freebsd/assert.h
@@ -0,0 +1,2 @@
+/* This file is in the public domain */
+#include <sys/kassert.h>

From 6af088c736c2fd9e64f2ad9449b7df1a109e6241 Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Thu, 9 Jan 2025 15:09:21 -0500
Subject: [PATCH 094/143] BUS_CHILD_DETACHED.9: This is also called if
 DEVICE_ATTACH fails

Reviewed by:	imp
Differential Revision:	https://reviews.freebsd.org/D48363
---
 share/man/man9/BUS_CHILD_DETACHED.9 | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/share/man/man9/BUS_CHILD_DETACHED.9 b/share/man/man9/BUS_CHILD_DETACHED.9
index 4cc00a49465b7a..8b59d1362d3d14 100644
--- a/share/man/man9/BUS_CHILD_DETACHED.9
+++ b/share/man/man9/BUS_CHILD_DETACHED.9
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd August 21, 2012
+.Dd January 9, 2025
 .Dt BUS_CHILD_DETACHED 9
 .Os
 .Sh NAME
@@ -39,7 +39,10 @@
 .Sh DESCRIPTION
 The
 .Fn BUS_CHILD_DETACHED
-method is invoked by the new-bus framework after a device is detached.
+method is invoked by the new-bus framework after a device is detached
+or if a driver's attach routine
+.Pq see Xr DEVICE_ATTACH 9
+fails.
 A bus driver can provide an implementation of this method to
 reclaim any resources allocated on behalf of the child or
 to cleanup state not properly released by a

From ccabc7c2e556ac0b14da9b682b706ccaf251c0fe Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Thu, 9 Jan 2025 15:20:16 -0500
Subject: [PATCH 095/143] DEVICE_IDENTIFY.9: Modernize description and use
 cases

Mention adding devices based on firmware tables and software-only
pseudo-devices as use cases for identify methods as those are more
common than reading random I/O ports to identify a legacy ISA device.

Describe how device_find_chid can be used to avoid duplicates.  While
here, explicitly note that devices added in identify methods typically
use a fixed device name.

Trim the cross-references a bit.

Reviewed by:	ziaee, imp
Differential Revision:	https://reviews.freebsd.org/D48367
---
 share/man/man9/DEVICE_IDENTIFY.9 | 52 +++++++++++++++-----------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/share/man/man9/DEVICE_IDENTIFY.9 b/share/man/man9/DEVICE_IDENTIFY.9
index d75c1a91ce4aed..b10d9414305034 100644
--- a/share/man/man9/DEVICE_IDENTIFY.9
+++ b/share/man/man9/DEVICE_IDENTIFY.9
@@ -26,44 +26,46 @@
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd January 15, 2017
+.Dd January 9, 2025
 .Dt DEVICE_IDENTIFY 9
 .Os
 .Sh NAME
 .Nm DEVICE_IDENTIFY
-.Nd identify a device, register it
+.Nd identify new child devices and register them
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/bus.h
 .Ft void
 .Fn DEVICE_IDENTIFY "driver_t *driver" "device_t parent"
 .Sh DESCRIPTION
-The identify function for a device is only needed for devices on buses
-that cannot identify their children independently, e.g.\& the ISA bus.
-It is used to recognize the device (usually done by accessing non-ambiguous
-registers in the hardware) and to tell the kernel about it and thus
-creating a new device instance.
+The identify method of a device driver is used to add devices that cannot be
+enumerated by the standard method on a bus device.
+Devices can be enumerated in various ways including accessing non-ambiguous
+device registers and parsing firmware tables.
+Software-only pseudo devices are also often enumerated via identify methods.
 .Pp
+For each newly identified device,
+a new device instance should be created by invoking the
 .Xr BUS_ADD_CHILD 9
-is used to register the device as a child of the bus.
-The device's resources (such as IRQ and I/O ports) are registered
-with the kernel by calling
-.Fn bus_set_resource
-for each resource (refer to
+method.
+If the identify method is able to discover other properties about the new
+device, those should also be set.
+For example, device resources should be added to the device by calling
 .Xr bus_set_resource 9
-for more information).
+for each resource.
 .Pp
-Since the device tree and the device driver tree are disjoint, the
-.Fn DEVICE_IDENTIFY
-routine needs to take this into account.
-If you load and unload your device driver that has the identify
-routine, the child node has the potential for adding the same node
-multiple times unless specific measure are taken to preclude that
-possibility.
+An identify method might be invoked multiple times.
+If a device driver is unloaded and loaded,
+the identify method will be called a second time after being reloaded.
+As a result, the identify method should avoid duplicate devices.
+Devices added by identify methods typically use a fixed device name
+in which case
+.Xr device_find_child 9
+can be used to detect existing devices.
 .Sh EXAMPLES
 The following pseudo-code shows an example of a function that
 probes for a piece of hardware and registers it and its resource
-(an I/O port) with the kernel.
+(an I/O port) with the parent bus device.
 .Bd -literal
 void
 foo_identify(driver_t *driver, device_t parent)
@@ -72,7 +74,7 @@ foo_identify(driver_t *driver, device_t parent)
 
 	retrieve_device_information;
 	if (devices matches one of your supported devices &&
-	    not already in device tree) {
+	    device_get_child(parent, "foo", DEVICE_UNIT_ANY) == NULL) {
 		child = BUS_ADD_CHILD(parent, 0, "foo", DEVICE_UNIT_ANY);
 		bus_set_resource(child, SYS_RES_IOPORT, 0, FOO_IOADDR, 1);
 	}
@@ -82,11 +84,7 @@ foo_identify(driver_t *driver, device_t parent)
 .Xr BUS_ADD_CHILD 9 ,
 .Xr bus_set_resource 9 ,
 .Xr device 9 ,
-.Xr device_add_child 9 ,
-.Xr DEVICE_ATTACH 9 ,
-.Xr DEVICE_DETACH 9 ,
-.Xr DEVICE_PROBE 9 ,
-.Xr DEVICE_SHUTDOWN 9
+.Xr device_find_child 9
 .Sh AUTHORS
 This manual page was written by
 .An Alexander Langer Aq Mt alex@FreeBSD.org .

From ed49d3b31d425a0add04aff6eb721a474937b7da Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Thu, 9 Jan 2025 21:09:52 -0500
Subject: [PATCH 096/143] twe.4: Remove manpage for previously-removed driver

Reviewed by:	ziaee, imp
Fixes:		062a7b918fac twe: Remove driver
Differential Revision:	https://reviews.freebsd.org/D48403
---
 ObsoleteFiles.inc       |   3 +
 share/man/man4/Makefile |   1 -
 share/man/man4/twe.4    | 278 ----------------------------------------
 3 files changed, 3 insertions(+), 279 deletions(-)
 delete mode 100644 share/man/man4/twe.4

diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc
index 473daec4c737c2..c05a0d37c72796 100644
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@@ -2236,6 +2236,9 @@ OLD_FILES+=usr/share/certs/trusted/TrustCor_ECA-1.pem
 OLD_FILES+=usr/share/certs/trusted/TrustCor_RootCert_CA-1.pem
 OLD_FILES+=usr/share/certs/trusted/TrustCor_RootCert_CA-2.pem
 
+# 20230510: twe(4) driver removed
+OLD_FILES+=usr/share/man/man4/twe.4.gz
+
 # 20230505: md5 tests are now self-contained
 OLD_FILES+=usr/tests/sbin/md5/1.inp
 OLD_FILES+=usr/tests/sbin/md5/1.sha512-p.chk
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
index a7dbf6c615d62c..c03ba63c349ff9 100644
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -584,7 +584,6 @@ MAN=	aac.4 \
 	tslog.4 \
 	tty.4 \
 	tun.4 \
-	twe.4 \
 	tws.4 \
 	udp.4 \
 	udplite.4 \
diff --git a/share/man/man4/twe.4 b/share/man/man4/twe.4
deleted file mode 100644
index 03a51b7b6a5096..00000000000000
--- a/share/man/man4/twe.4
+++ /dev/null
@@ -1,278 +0,0 @@
-.\"
-.\" Copyright (c) 2000 Michael Smith
-.\" Copyright (c) 2000 BSDi
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. The name of the author may not be used to endorse or promote products
-.\"    derived from this software without specific prior written permission
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
-.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
-.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-.\"
-.Dd May 7, 2023
-.Dt TWE 4
-.Os
-.Sh NAME
-.Nm twe
-.Nd 3ware 5000/6000/7000/8000 series PATA/SATA RAID adapter driver
-.Sh SYNOPSIS
-To compile this driver into the kernel,
-place the following lines in your
-kernel configuration file:
-.Bd -ragged -offset indent
-.Cd "device pci"
-.Cd "device twe"
-.Ed
-.Pp
-Alternatively, to load the driver as a
-module at boot time, place the following line in
-.Xr loader.conf 5 :
-.Bd -literal -offset indent
-twe_load="YES"
-.Ed
-.Sh DEPRECATION NOTICE
-The
-.Nm
-driver is not present in
-.Fx 14.0 .
-.Sh DESCRIPTION
-The
-.Nm
-driver provides support for AMCC's 3ware 5000/6000/7000/8000 series
-PATA/SATA RAID adapters.
-These adapters were formerly known as
-.Dq 3ware Escalade .
-.Pp
-These devices support 2, 4, 8, or 12 ATA disk drives
-and provide RAID0 (striping) and RAID1 (mirroring) functionality.
-.Sh HARDWARE
-The
-.Nm
-driver supports the following PATA/SATA RAID
-controllers:
-.Pp
-.Bl -bullet -compact
-.It
-AMCC's 3ware 5000 series
-.It
-AMCC's 3ware 6000 series
-.It
-AMCC's 3ware 7000-2
-.It
-AMCC's 3ware 7006-2
-.It
-AMCC's 3ware 7500-4LP
-.It
-AMCC's 3ware 7500-8
-.It
-AMCC's 3ware 7500-12
-.It
-AMCC's 3ware 7506-4LP
-.It
-AMCC's 3ware 7506-8
-.It
-AMCC's 3ware 7506-12
-.It
-AMCC's 3ware 8006-2LP
-.It
-AMCC's 3ware 8500-4LP
-.It
-AMCC's 3ware 8500-8
-.It
-AMCC's 3ware 8500-12
-.It
-AMCC's 3ware 8506-4LP
-.It
-AMCC's 3ware 8506-8
-.It
-AMCC's 3ware 8506-8MI
-.It
-AMCC's 3ware 8506-12
-.It
-AMCC's 3ware 8506-12MI
-.El
-.Sh DIAGNOSTICS
-.Ss Controller initialisation phase
-.Bl -diag
-.It twe%d: microcontroller not ready
-.Pp
-The controller's onboard CPU is not reporting that it is ready;
-this may be due to either a board or system failure.
-Initialisation has failed.
-.It twe%d: no attention interrupt
-.It twe%d: can't drain AEN queue
-.It twe%d: reset not reported
-.It twe%d: controller errors detected
-.It twe%d: can't drain response queue
-.It twe%d: reset %d failed, trying again
-.Pp
-The controller is not responding correctly to
-the driver's attempts to reset and initialise it.
-This process is retried several times.
-.It twe%d: can't initialise controller, giving up
-.Pp
-Several attempts to reset and initialise the controller have failed;
-initialisation has failed
-and the driver will not attach to this controller.
-.El
-.Ss Driver initialisation/shutdown phase
-.Bl -diag
-.It twe%d: register window not available
-.It twe%d: can't allocate register window
-.It twe%d: can't allocate parent DMA tag
-.It twe%d: can't allocate interrupt
-.It twe%d: can't set up interrupt
-.It twe%d: can't establish configuration hook
-.Pp
-A resource allocation error occurred while initialising the driver;
-initialisation has failed
-and the driver will not attach to this controller.
-.It twe%d: can't detect attached units
-.Pp
-Fetching the list of attached units failed; initialisation has failed.
-.It twe%d: error fetching capacity for unit %d
-.It twe%d: error fetching state for unit %d
-.It twe%d: error fetching descriptor size for unit %d
-.It twe%d: error fetching descriptor for unit %d
-.It twe%d: device_add_child failed
-.It twe%d: bus_generic_attach returned %d
-.Pp
-Creation of the disk devices failed, either due to communication
-problems with the adapter or due to resource shortage;
-attachment of one or more units may have been aborted.
-.El
-.Ss Operational phase
-.Bl -diag
-.It twe%d: command completed - %s
-.El
-.Pp
-A command was reported completed with a warning by the controller.
-The warning may be one of:
-.Bl -diag
-.It redundant/inconsequential request ignored
-.It failed to write zeroes to LBA 0
-.It failed to profile TwinStor zones
-.El
-.Bl -diag
-.It twe%d: command failed - %s
-.El
-.Pp
-A command was reported as failed by the controller.
-The failure message may be one of:
-.Bl -diag
-.It aborted due to system command or reconfiguration
-.It aborted
-.It access error
-.It access violation
-.It device failure
-.It controller error
-.It timed out
-.It invalid unit number
-.It unit not available
-.It undefined opcode
-.It request incompatible with unit
-.It invalid request
-.It firmware error, reset requested
-.Pp
-The command will be returned to the operating system after a
-fatal error.
-.El
-.Bl -diag
-.It twe%d: command failed submission - controller wedged
-.Pp
-A command could not be delivered to the controller because
-the controller is unresponsive.
-.It twe%d: AEN: <%s>
-.El
-.Pp
-The controller has reported a change in status using an AEN
-(Asynchronous Event Notification).
-The following AENs may be reported:
-.Bl -diag
-.It queue empty
-.It soft reset
-.It degraded mirror
-.It controller error
-.It rebuild fail
-.It rebuild done
-.It incomplete unit
-.It initialisation done
-.It unclean shutdown detected
-.It drive timeout
-.It drive error
-.It rebuild started
-.It aen queue full
-.Pp
-AENs are also queued internally for use by management tools.
-.El
-.Bl -diag
-.It twe%d: error polling for signalled AENs
-.Pp
-The controller has reported
-that one or more status messages are ready for the driver,
-but attempting to fetch one of these has returned an error.
-.It twe%d: AEN queue overflow, lost AEN <%s>
-.Pp
-A status message was retrieved from the controller,
-but there is no more room to queue it in the driver.
-The message is lost (but will be printed to the console).
-.It twe%d: missing expected status bits %s
-.It twe%d: unexpected status bits %s
-.Pp
-A check of the controller's status bits
-indicates an unexpected condition.
-.It twe%d: host interrupt
-.Pp
-The controller has signalled a host interrupt.
-This serves an unknown purpose and is ignored.
-.It twe%d: command interrupt
-.Pp
-The controller has signalled a command interrupt.
-This is not used, and will be disabled.
-.It twe%d: controller reset in progress...
-.Pp
-The controller is being reset by the driver.
-Typically this is done when the driver has determined that the
-controller is in an unrecoverable state.
-.It twe%d: can't reset controller, giving up
-.Pp
-The driver has given up on resetting the controller.
-No further I/O will be handled.
-.It controller reset done, %d commands restarted
-.Pp
-The controller was successfully reset,
-and outstanding commands were restarted.
-.El
-.Sh AUTHORS
-.An -nosplit
-The
-.Nm
-driver and manual page were written by
-.An Michael Smith Aq Mt msmith@FreeBSD.org .
-.Pp
-Extensive work done on the driver by
-.An Vinod Kashyap Aq Mt vkashyap@FreeBSD.org
-and
-.An Paul Saab Aq Mt ps@FreeBSD.org .
-.Sh BUGS
-The controller cannot handle I/O transfers
-that are not aligned to a 512-byte boundary.
-In order to support raw device access from user-space,
-the driver will perform alignment fixup on non-aligned data.
-This process is inefficient,
-and thus in order to obtain best performance
-user-space applications accessing the device
-should do so with aligned buffers.

From f9f0a1d61c7b97c705246c747baec385e0592966 Mon Sep 17 00:00:00 2001
From: Rick Macklem <rmacklem@FreeBSD.org>
Date: Thu, 9 Jan 2025 19:54:41 -0800
Subject: [PATCH 097/143] nfscl: Fix a crash when a readdir entry has nul in it

Commit 026cdaa3b3a9 added a check for a nul or "/" in a file
name in a readdir reply.  Unfortunately, the minimal testing
done on it did not detect a bug that can cause the client
to crash.

This patch fixes the code so that it does not crash.

Note that a NFS server will not normally return a file
name in a readdir reply that has a nul or "/" in it,
so the crash is unlikely.

PR:	283965
Reported by:	asomers
Tested by:	asomers
MFC after:	2 weeks
---
 sys/fs/nfsclient/nfs_clrpcops.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index e1c02a71939b3a..c35d0c6295b934 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -3397,6 +3397,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 	nfsattrbit_t attrbits, dattrbits;
 	u_int32_t rderr, *tl2 = NULL;
 	size_t tresid;
+	bool validentry;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
@@ -3622,6 +3623,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 
 		/* loop through the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
+			validentry = true;
 			if (nd->nd_flag & ND_NFSV4) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				ncookie.lval[0] = *tl++;
@@ -3701,6 +3703,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 					uiop->uio_offset = savoff;
 					uiop->uio_resid = savresid;
 					blksiz = savblksiz;
+					validentry = false;
 				} else {
 					cp = uiop->uio_iov->iov_base;
 					tlen -= len;
@@ -3738,7 +3741,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				ncookie.lval[0] = 0;
 				ncookie.lval[1] = *tl++;
 			}
-			if (bigenough) {
+			if (bigenough && validentry) {
 			    if (nd->nd_flag & ND_NFSV4) {
 				if (rderr) {
 				    dp->d_fileno = 0;
@@ -3875,7 +3878,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 	size_t tresid;
 	u_int32_t *tl2 = NULL, rderr;
 	struct timespec dctime, ts;
-	bool attr_ok;
+	bool attr_ok, validentry;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
@@ -4086,6 +4089,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 
 		/* loop through the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
+			validentry = true;
 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			if (nd->nd_flag & ND_NFSV4) {
 				ncookie.lval[0] = *tl++;
@@ -4161,6 +4165,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 					uiop->uio_offset = savoff;
 					uiop->uio_resid = savresid;
 					blksiz = savblksiz;
+					validentry = false;
 				} else {
 					cp = uiop->uio_iov->iov_base;
 					tlen -= len;
@@ -4217,7 +4222,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 					goto nfsmout;
 			}
 
-			if (bigenough) {
+			if (bigenough && validentry) {
 			    if (nd->nd_flag & ND_NFSV4) {
 				if (rderr) {
 				    dp->d_fileno = 0;

From 16f0d01f9ca1e28bede9a493329c5d66e317d88f Mon Sep 17 00:00:00 2001
From: Kyle Evans <kevans@FreeBSD.org>
Date: Thu, 9 Jan 2025 22:27:50 -0600
Subject: [PATCH 098/143] arm64: apple: fix aic for !SMP configurations

Allocate sc_cpuids anyways, even if it's just a single entry, to
minimize functional diff between SMP and !SMP.

Reviewed by:	jhb
Differential Revision:	https://reviews.freebsd.org/D48289
---
 sys/arm64/apple/apple_aic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sys/arm64/apple/apple_aic.c b/sys/arm64/apple/apple_aic.c
index b500099a5430fd..c9ce3b4d21659a 100644
--- a/sys/arm64/apple/apple_aic.c
+++ b/sys/arm64/apple/apple_aic.c
@@ -137,9 +137,9 @@ struct apple_aic_softc {
 	u_int			sc_ndie;
 #ifdef SMP
 	struct apple_aic_irqsrc	sc_ipi_srcs[AIC_NIPIS];
-	u_int			*sc_cpuids;	/* cpu index to AIC CPU ID */
 	uint32_t		*sc_ipimasks;
 #endif
+	u_int			*sc_cpuids;	/* cpu index to AIC CPU ID */
 };
 
 static u_int aic_next_cpu;
@@ -215,6 +215,7 @@ apple_aic_attach(device_t dev)
 #ifdef SMP
 	sc->sc_ipimasks = malloc(sizeof(*sc->sc_ipimasks) * mp_maxid + 1,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
+#endif
 	sc->sc_cpuids = malloc(sizeof(*sc->sc_cpuids) * mp_maxid + 1,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 
@@ -223,8 +224,6 @@ apple_aic_attach(device_t dev)
 	if (bootverbose)
 		device_printf(dev, "BSP CPU %d: whoami %x\n", cpu,
 		    sc->sc_cpuids[cpu]);
-#endif
-
 
 	name = device_get_nameunit(dev);
 	for (i = 0; i < sc->sc_ndie; i++) {

From 9d1de25930735261c16ed874a933b4c1f1d9041e Mon Sep 17 00:00:00 2001
From: Andrew Turner <andrew@FreeBSD.org>
Date: Fri, 10 Jan 2025 10:34:52 +0000
Subject: [PATCH 099/143] Update the Arm Optimized Routines

Import the v25.01 release of the Arm Optimized Routines [1].

[1] https://github.com/ARM-software/optimized-routines/tree/v25.01

Sponsored by:	Arm Ltd
---
 MAINTAINERS                                   |    9 +-
 Makefile                                      |   16 +-
 README                                        |   33 +-
 config.mk.dist                                |   99 +-
 math/Dir.mk                                   |  253 ++-
 math/README.contributors                      |    5 +-
 .../aarch64/advsimd/acos.c                    |   30 +-
 .../aarch64/advsimd/acosf.c                   |   32 +-
 .../aarch64/advsimd/acosh.c                   |   27 +-
 .../aarch64/advsimd/acoshf.c                  |   62 +-
 .../aarch64/advsimd/asin.c                    |   75 +-
 .../aarch64/advsimd/asinf.c                   |   30 +-
 math/aarch64/advsimd/asinh.c                  |  242 +++
 math/aarch64/advsimd/asinhf.c                 |   89 +
 .../aarch64/advsimd/atan.c                    |   85 +-
 math/aarch64/advsimd/atan2.c                  |  171 ++
 .../aarch64/advsimd/atan2f.c                  |   84 +-
 .../aarch64/advsimd/atanf.c                   |   26 +-
 .../aarch64/advsimd/atanh.c                   |   45 +-
 .../aarch64/advsimd/atanhf.c                  |   49 +-
 .../aarch64/advsimd/cbrt.c                    |   43 +-
 .../aarch64/advsimd/cbrtf.c                   |   19 +-
 .../aarch64/advsimd/cexpi.c                   |   14 +-
 .../aarch64/advsimd/cexpif.c                  |   14 +-
 math/aarch64/{v_cos.c => advsimd/cos.c}       |   21 +-
 math/aarch64/{v_cosf.c => advsimd/cosf.c}     |   23 +-
 .../aarch64/advsimd/cosh.c                    |   25 +-
 .../aarch64/advsimd/coshf.c                   |   52 +-
 .../aarch64/advsimd/cospi.c                   |   25 +-
 .../aarch64/advsimd/cospif.c                  |   29 +-
 .../v_erf_2u5.c => math/aarch64/advsimd/erf.c |   48 +-
 .../aarch64/advsimd/erfc.c                    |   65 +-
 .../aarch64/advsimd/erfcf.c                   |   54 +-
 .../aarch64/advsimd/erff.c                    |   34 +-
 math/aarch64/{v_exp.c => advsimd/exp.c}       |   11 +-
 .../aarch64/advsimd/exp10.c                   |   23 +-
 .../aarch64/advsimd/exp10f.c                  |   87 +-
 .../aarch64/advsimd/exp2.c                    |   28 +-
 math/aarch64/{v_exp2f.c => advsimd/exp2f.c}   |   69 +-
 math/aarch64/advsimd/exp2f_1u.c               |   73 +
 math/aarch64/{v_expf.c => advsimd/expf.c}     |   72 +-
 math/aarch64/advsimd/expf_1u.c                |   79 +
 math/aarch64/advsimd/expm1.c                  |   77 +
 math/aarch64/advsimd/expm1f.c                 |   82 +
 .../aarch64/advsimd}/finite_pow.h             |   22 +-
 .../aarch64/advsimd/hypot.c                   |   30 +-
 .../aarch64/advsimd/hypotf.c                  |   36 +-
 math/aarch64/advsimd/log.c                    |  118 ++
 math/aarch64/advsimd/log10.c                  |  132 ++
 math/aarch64/advsimd/log10f.c                 |  106 ++
 math/aarch64/advsimd/log1p.c                  |   61 +
 math/aarch64/advsimd/log1pf.c                 |   92 +
 math/aarch64/advsimd/log2.c                   |  123 ++
 math/aarch64/advsimd/log2f.c                  |  102 ++
 math/aarch64/advsimd/logf.c                   |   88 +
 math/aarch64/advsimd/modf.c                   |   33 +
 math/aarch64/advsimd/modff.c                  |   34 +
 .../v_pow_1u5.c => math/aarch64/advsimd/pow.c |  195 ++-
 math/aarch64/advsimd/powf.c                   |  209 +++
 math/aarch64/{v_sin.c => advsimd/sin.c}       |   26 +-
 .../aarch64/advsimd/sincos.c                  |   30 +-
 .../aarch64/advsimd/sincosf.c                 |   30 +-
 math/aarch64/advsimd/sincospi.c               |   44 +
 math/aarch64/advsimd/sincospif.c              |   43 +
 math/aarch64/{v_sinf.c => advsimd/sinf.c}     |   36 +-
 math/aarch64/advsimd/sinh.c                   |   80 +
 .../aarch64/advsimd/sinhf.c                   |   46 +-
 .../aarch64/advsimd/sinpi.c                   |   25 +-
 .../aarch64/advsimd/sinpif.c                  |   29 +-
 .../v_tan_3u5.c => math/aarch64/advsimd/tan.c |   28 +-
 .../aarch64/advsimd/tanf.c                    |   35 +-
 math/aarch64/advsimd/tanh.c                   |   67 +
 .../aarch64/advsimd/tanhf.c                   |   44 +-
 math/aarch64/advsimd/tanpi.c                  |   88 +
 math/aarch64/advsimd/tanpif.c                 |   70 +
 math/aarch64/advsimd/v_expf_inline.h          |   58 +
 math/aarch64/advsimd/v_expm1_inline.h         |   86 +
 math/aarch64/advsimd/v_expm1f_inline.h        |   62 +
 math/aarch64/advsimd/v_log1p_inline.h         |  119 ++
 math/aarch64/advsimd/v_log1pf_inline.h        |   94 +
 .../aarch64/advsimd}/v_log_inline.h           |    6 +-
 {pl/math => math/aarch64/advsimd}/v_math.h    |   91 +-
 .../aarch64/advsimd/v_poly_f32.h              |    6 +-
 .../aarch64/advsimd/v_poly_f64.h              |    6 +-
 .../aarch64/advsimd}/v_sincos_common.h        |    4 +-
 .../aarch64/advsimd}/v_sincosf_common.h       |    2 +-
 math/aarch64/advsimd/v_sincospi_common.h      |   64 +
 math/aarch64/advsimd/v_sincospif_common.h     |   57 +
 .../cospi_3u1.c => math/aarch64/cospi_3u5.c   |   31 +-
 {pl/math => math/aarch64}/cospif_2u6.c        |   31 +-
 .../aarch64/experimental}/README.contributors |    7 -
 .../aarch64/experimental}/acos_2u.c           |   44 +-
 .../aarch64/experimental}/acosf_1u4.c         |   40 +-
 .../aarch64/experimental}/acosh_3u.c          |   35 +-
 .../aarch64/experimental}/acoshf_2u8.c        |   32 +-
 .../aarch64/experimental/advsimd/erfinv_25u.c |   35 +-
 .../aarch64/experimental/advsimd/erfinvf_5u.c |   49 +-
 .../experimental/advsimd}/v_logf_inline.h     |    2 +-
 .../aarch64/experimental}/asin_3u.c           |   40 +-
 .../aarch64/experimental}/asin_data.c         |    2 +-
 .../aarch64/experimental}/asinf_2u5.c         |   36 +-
 .../aarch64/experimental}/asinf_data.c        |    2 +-
 .../aarch64/experimental}/asinh_2u5.c         |   33 +-
 .../aarch64/experimental}/asinh_data.c        |   17 +-
 .../aarch64/experimental}/asinhf_3u5.c        |   25 +-
 math/aarch64/experimental/asinhf_data.c       |   15 +
 .../aarch64/experimental}/atan2_2u5.c         |   24 +-
 .../aarch64/experimental}/atan2f_3u.c         |   24 +-
 .../aarch64/experimental}/atan_2u5.c          |   22 +-
 .../aarch64/experimental}/atan_common.h       |    2 +-
 math/aarch64/experimental/atan_data.c         |   23 +
 .../aarch64/experimental}/atanf_2u9.c         |   18 +-
 .../aarch64/experimental}/atanf_common.h      |    2 +-
 math/aarch64/experimental/atanf_data.c        |   17 +
 .../aarch64/experimental}/atanh_3u.c          |   18 +-
 .../aarch64/experimental}/atanhf_3u1.c        |   16 +-
 .../aarch64/experimental}/cbrt_2u.c           |   16 +-
 .../aarch64/experimental}/cbrt_data.c         |    2 +-
 .../aarch64/experimental}/cbrtf_1u5.c         |   16 +-
 .../aarch64/experimental}/cbrtf_data.c        |    2 +-
 .../aarch64/experimental}/cosh_2u.c           |   34 +-
 .../aarch64/experimental}/coshf_1u9.c         |   29 +-
 .../aarch64/experimental}/erf_2u5.c           |   21 +-
 .../aarch64/experimental}/erfc_1u8.c          |   26 +-
 .../aarch64/experimental}/erfcf_1u7.c         |   24 +-
 .../aarch64/experimental}/erff_2u.c           |   21 +-
 .../aarch64/experimental}/erfinv_24u5.c       |   20 +-
 .../aarch64/experimental}/erfinvf_4u7.c       |   16 +-
 .../aarch64/experimental}/erfinvl.c           |    2 +-
 .../aarch64/experimental/exp_inline.h         |   22 +-
 .../aarch64/experimental}/expf_data.c         |    4 +-
 .../aarch64/experimental}/expm1_2u5.c         |   20 +-
 math/aarch64/experimental/expm1_data.c        |   21 +
 .../aarch64/experimental}/expm1f_1u6.c        |   24 +-
 .../aarch64/experimental}/expm1f_data.c       |    6 +-
 .../aarch64/experimental}/log10_2u.c          |   33 +-
 .../aarch64/experimental}/log10_data.c        |    4 +-
 .../aarch64/experimental}/log1p_2u.c          |   20 +-
 math/aarch64/experimental/log1p_data.c        |   20 +
 .../aarch64/experimental}/log1pf_2u1.c        |   18 +-
 .../aarch64/experimental}/log1pf_data.c       |    8 +-
 .../aarch64/experimental}/sinh_3u.c           |   27 +-
 .../aarch64/experimental}/sinhf_2u3.c         |   32 +-
 math/aarch64/experimental/sve/erfinv_25u.c    |  156 ++
 math/aarch64/experimental/sve/erfinvf_5u.c    |  156 ++
 .../aarch64/experimental/sve/powi.c           |    3 +-
 .../aarch64/experimental/sve/powif.c          |    3 +-
 .../aarch64/experimental/sve/sv_logf_inline.h |   51 +
 .../aarch64/experimental}/tanf_3u3.c          |   42 +-
 .../aarch64/experimental}/tanf_data.c         |    2 +-
 .../aarch64/experimental}/tanh_3u.c           |   22 +-
 .../aarch64/experimental}/tanhf_2u6.c         |   25 +-
 math/aarch64/sincospi_4u.c                    |  158 ++
 math/aarch64/sincospif_3u2.c                  |  145 ++
 .../sinpi_3u.c => math/aarch64/sinpi_3u5.c    |   39 +-
 {pl/math => math/aarch64}/sinpif_2u5.c        |   35 +-
 .../sv_acos_2u.c => math/aarch64/sve/acos.c   |   24 +-
 .../aarch64/sve/acosf.c                       |   24 +-
 math/aarch64/sve/acosh.c                      |   51 +
 math/aarch64/sve/acoshf.c                     |   51 +
 .../sv_asin_3u.c => math/aarch64/sve/asin.c   |   28 +-
 .../aarch64/sve/asinf.c                       |   24 +-
 math/aarch64/sve/asinh.c                      |  197 +++
 .../aarch64/sve/asinhf.c                      |   38 +-
 .../sv_atan_2u5.c => math/aarch64/sve/atan.c  |   22 +-
 .../aarch64/sve/atan2.c                       |   54 +-
 .../aarch64/sve/atan2f.c                      |   55 +-
 .../aarch64/sve/atanf.c                       |   22 +-
 .../aarch64/sve/atanh.c                       |   24 +-
 .../aarch64/sve/atanhf.c                      |   33 +-
 .../sv_cbrt_2u.c => math/aarch64/sve/cbrt.c   |   35 +-
 .../aarch64/sve/cbrtf.c                       |   16 +-
 .../aarch64/sve/cexpi.c                       |   17 +-
 .../aarch64/sve/cexpif.c                      |   17 +-
 .../sv_cos_2u5.c => math/aarch64/sve/cos.c    |   16 +-
 .../sv_cosf_2u1.c => math/aarch64/sve/cosf.c  |   16 +-
 .../sv_cosh_2u.c => math/aarch64/sve/cosh.c   |   34 +-
 math/aarch64/sve/coshf.c                      |   62 +
 .../aarch64/sve/cospi.c                       |   25 +-
 .../aarch64/sve/cospif.c                      |   25 +-
 .../sv_erf_2u5.c => math/aarch64/sve/erf.c    |   28 +-
 .../sv_erfc_1u8.c => math/aarch64/sve/erfc.c  |   24 +-
 .../aarch64/sve/erfcf.c                       |   36 +-
 .../sv_erff_2u.c => math/aarch64/sve/erff.c   |   33 +-
 .../sv_exp_1u5.c => math/aarch64/sve/exp.c    |   56 +-
 .../aarch64/sve/exp10.c                       |   43 +-
 math/aarch64/sve/exp10f.c                     |  101 ++
 .../sv_exp2_2u.c => math/aarch64/sve/exp2.c   |   44 +-
 math/aarch64/sve/exp2f.c                      |   83 +
 math/aarch64/sve/expf.c                       |   50 +
 .../aarch64/sve/expm1.c                       |   20 +-
 .../aarch64/sve/expm1f.c                      |   46 +-
 .../aarch64/sve/hypot.c                       |   20 +-
 .../aarch64/sve/hypotf.c                      |   20 +-
 math/aarch64/sve/log.c                        |   97 +
 math/aarch64/sve/log10.c                      |  101 ++
 .../aarch64/sve/log10f.c                      |   65 +-
 .../aarch64/sve/log1p.c                       |   24 +-
 math/aarch64/sve/log1pf.c                     |   43 +
 math/aarch64/sve/log2.c                       |   96 +
 .../aarch64/sve/log2f.c                       |   62 +-
 .../sv_logf_3u4.c => math/aarch64/sve/logf.c  |   64 +-
 math/aarch64/sve/modf.c                       |   36 +
 math/aarch64/sve/modff.c                      |   36 +
 .../sv_pow_1u5.c => math/aarch64/sve/pow.c    |  295 ++--
 .../sv_powf_2u6.c => math/aarch64/sve/powf.c  |  157 +-
 .../sv_sin_3u5.c => math/aarch64/sve/sin.c    |   16 +-
 .../aarch64/sve/sincos.c                      |   36 +-
 .../aarch64/sve/sincosf.c                     |   36 +-
 math/aarch64/sve/sincospi.c                   |   47 +
 math/aarch64/sve/sincospif.c                  |   46 +
 .../sv_sinf_1u9.c => math/aarch64/sve/sinf.c  |   16 +-
 .../sv_sinh_3u.c => math/aarch64/sve/sinh.c   |   20 +-
 .../aarch64/sve/sinhf.c                       |   21 +-
 .../aarch64/sve/sinpi.c                       |   33 +-
 .../aarch64/sve/sinpif.c                      |   33 +-
 math/aarch64/sve/sv_expf_inline.h             |   66 +
 .../aarch64/sve}/sv_expm1f_inline.h           |   36 +-
 .../aarch64/sve}/sv_log1p_inline.h            |   14 +-
 math/aarch64/sve/sv_log1pf_inline.h           |   83 +
 math/aarch64/sve/sv_log_inline.h              |   83 +
 {pl/math => math/aarch64/sve}/sv_math.h       |   32 +-
 .../aarch64/sve/sv_poly_f32.h                 |    8 +-
 .../aarch64/sve/sv_poly_f64.h                 |    8 +-
 .../aarch64/sve/sv_poly_generic.h             |   32 +-
 .../aarch64/sve}/sv_sincos_common.h           |    4 +-
 .../aarch64/sve}/sv_sincosf_common.h          |    2 +-
 math/aarch64/sve/sv_sincospi_common.h         |   76 +
 math/aarch64/sve/sv_sincospif_common.h        |   82 +
 math/aarch64/sve/tan.c                        |  131 ++
 .../sv_tanf_3u5.c => math/aarch64/sve/tanf.c  |   46 +-
 .../sv_tanh_3u.c => math/aarch64/sve/tanh.c   |   20 +-
 math/aarch64/sve/tanhf.c                      |   68 +
 math/aarch64/sve/tanpi.c                      |   89 +
 math/aarch64/sve/tanpif.c                     |   68 +
 math/aarch64/tanpi_2u5.c                      |  158 ++
 math/aarch64/tanpif_3u1.c                     |  145 ++
 .../erf_data.c => math/aarch64/v_erf_data.c   |   10 +-
 .../erfc_data.c => math/aarch64/v_erfc_data.c |   10 +-
 .../aarch64/v_erfcf_data.c                    |   10 +-
 .../erff_data.c => math/aarch64/v_erff_data.c |   10 +-
 math/aarch64/v_exp2f_1u.c                     |   72 -
 math/aarch64/v_exp_data.c                     |   99 +-
 {pl/math => math/aarch64}/v_exp_tail_data.c   |    4 +-
 math/aarch64/v_expf_1u.c                      |   77 -
 math/aarch64/v_log.c                          |  100 --
 {pl/math => math/aarch64}/v_log10_data.c      |    2 +-
 {pl/math => math/aarch64}/v_log2_data.c       |    2 +-
 math/aarch64/v_log_data.c                     |   25 +-
 math/aarch64/v_logf.c                         |   74 -
 math/aarch64/v_math.h                         |  135 --
 math/aarch64/v_pow.c                          |   22 -
 {pl/math => math/aarch64}/v_pow_exp_data.c    |    2 +-
 {pl/math => math/aarch64}/v_pow_log_data.c    |    2 +-
 math/aarch64/v_powf.c                         |  148 --
 {pl/math => math/aarch64}/v_powf_data.c       |    2 +-
 math/cosf.c                                   |   10 +-
 math/erf.c                                    |   12 +-
 math/erff.c                                   |   12 +-
 math/exp.c                                    |   25 +-
 math/exp10.c                                  |   22 +-
 math/exp2.c                                   |   11 +-
 math/exp2f.c                                  |   10 +-
 math/expf.c                                   |   10 +-
 math/include/mathlib.h                        |  294 +++-
 math/include/test_defs.h                      |   21 +
 math/include/test_sig.h                       |   47 +
 math/log.c                                    |   11 +-
 {pl/math => math}/log10f.c                    |   24 +-
 math/log2.c                                   |   11 +-
 math/log2f.c                                  |   11 +-
 math/logf.c                                   |   11 +-
 math/logf_data.c                              |    3 +-
 math/math_config.h                            |  261 ++-
 {pl/math => math}/poly_generic.h              |    2 +-
 {pl/math => math}/poly_scalar_f32.h           |    6 +-
 {pl/math => math}/poly_scalar_f64.h           |    6 +-
 math/pow.c                                    |   22 +-
 math/powf.c                                   |   12 +-
 math/sincosf.c                                |   12 +-
 math/sincosf.h                                |    5 +-
 math/sinf.c                                   |   10 +-
 math/test/mathbench.c                         |  229 +--
 math/test/mathbench_funcs.h                   |  141 +-
 math/test/mathbench_wrappers.h                |  302 +++-
 math/test/mathtest.c                          |   12 +-
 math/test/rtest/dotest.c                      |   45 +-
 math/test/runulp.sh                           |  311 +---
 math/test/test_defs.h                         |   31 +
 .../test/testcases/directed/acos.tst          |    2 +-
 .../test/testcases/directed/acosf.tst         |    2 +-
 .../test/testcases/directed/acosh.tst         |    2 +-
 .../test/testcases/directed/acoshf.tst        |    2 +-
 .../test/testcases/directed/asin.tst          |    2 +-
 .../test/testcases/directed/asinf.tst         |    2 +-
 .../test/testcases/directed/asinh.tst         |    2 +-
 .../test/testcases/directed/asinhf.tst        |    2 +-
 .../test/testcases/directed/atan.tst          |    2 +-
 .../test/testcases/directed/atan2.tst         |    2 +-
 .../test/testcases/directed/atan2f.tst        |    2 +-
 .../test/testcases/directed/atanf.tst         |    2 +-
 .../test/testcases/directed/atanh.tst         |    2 +-
 .../test/testcases/directed/atanhf.tst        |    2 +-
 .../test/testcases/directed/cbrtf.tst         |    2 +-
 .../test/testcases/directed/cosh.tst          |    2 +-
 .../test/testcases/directed/coshf.tst         |    2 +-
 .../test/testcases/directed/erfc.tst          |    2 +-
 .../test/testcases/directed/erfcf.tst         |    2 +-
 .../test/testcases/directed/expm1.tst         |    2 +-
 .../test/testcases/directed/expm1f.tst        |    2 +-
 .../test/testcases/directed/log10.tst         |    2 +-
 .../test/testcases/directed/log10f.tst        |    2 +-
 .../test/testcases/directed/log1p.tst         |    2 +-
 .../test/testcases/directed/log1pf.tst        |    2 +-
 .../test/testcases/directed/sinh.tst          |    2 +-
 .../test/testcases/directed/sinhf.tst         |    2 +-
 .../test/testcases/directed/tanf.tst          |    2 +-
 .../test/testcases/directed/tanh.tst          |    2 +-
 .../test/testcases/directed/tanhf.tst         |    2 +-
 math/test/trigpi_references.h                 |  106 ++
 math/test/ulp.c                               |  328 ++--
 math/test/ulp.h                               |   41 +-
 math/test/ulp_funcs.h                         |  119 +-
 math/test/ulp_wrappers.h                      |  418 ++++-
 math/tgamma128.c                              |    2 +
 {pl/math => math}/tools/asin.sollya           |    2 +-
 {pl/math => math}/tools/asinf.sollya          |    2 +-
 {pl/math => math}/tools/asinh.sollya          |    2 +-
 {pl/math => math}/tools/asinhf.sollya         |    2 +-
 {pl/math => math}/tools/atan.sollya           |    2 +-
 {pl/math => math}/tools/atanf.sollya          |    2 +-
 {pl/math => math}/tools/cbrt.sollya           |    2 +-
 {pl/math => math}/tools/cbrtf.sollya          |    2 +-
 {pl/math => math}/tools/erf.sollya            |    2 +-
 {pl/math => math}/tools/erfc.sollya           |    2 +-
 {pl/math => math}/tools/erfcf.sollya          |    2 +-
 {pl/math => math}/tools/erff.sollya           |    2 +-
 {pl/math => math}/tools/exp10.sollya          |    2 +-
 {pl/math => math}/tools/expm1.sollya          |    2 +-
 {pl/math => math}/tools/expm1f.sollya         |    2 +-
 {pl/math => math}/tools/log10.sollya          |    2 +-
 {pl/math => math}/tools/log10f.sollya         |    2 +-
 {pl/math => math}/tools/log1p.sollya          |    2 +-
 {pl/math => math}/tools/log1pf.sollya         |    2 +-
 {pl/math => math}/tools/sincos.sollya         |    4 +-
 {pl/math => math}/tools/sincosf.sollya        |    2 +-
 {pl/math => math}/tools/sinpi.sollya          |    2 +-
 {pl/math => math}/tools/tan.sollya            |    2 +-
 {pl/math => math}/tools/tanf.sollya           |    2 +-
 math/tools/tanpi.sollya                       |   48 +
 {pl/math => math}/tools/v_erf.sollya          |    2 +-
 {pl/math => math}/tools/v_erfc.sollya         |    2 +-
 {pl/math => math}/tools/v_log10.sollya        |    2 +-
 {pl/math => math}/tools/v_log10f.sollya       |    2 +-
 {pl/math => math}/tools/v_log2f.sollya        |    2 +-
 networking/Dir.mk                             |    6 +-
 pl/Dir.mk                                     |   21 -
 pl/math/Dir.mk                                |  216 ---
 pl/math/asinhf_data.c                         |   15 -
 pl/math/atan_data.c                           |   20 -
 pl/math/atanf_data.c                          |   15 -
 pl/math/exp_data.c                            | 1120 ------------
 pl/math/expf.c                                |   76 -
 pl/math/expm1_data.c                          |   21 -
 pl/math/include/mathlib.h                     |  206 ---
 pl/math/include/pl_test.h                     |   24 -
 pl/math/log.c                                 |  161 --
 pl/math/log1p_data.c                          |   19 -
 pl/math/log_data.c                            |  511 ------
 pl/math/logf.c                                |   75 -
 pl/math/logf_data.c                           |   36 -
 pl/math/math_config.h                         |  624 -------
 pl/math/math_err.c                            |   78 -
 pl/math/math_errf.c                           |   78 -
 pl/math/pl_sig.h                              |   59 -
 pl/math/sv_acosh_3u5.c                        |   50 -
 pl/math/sv_acoshf_2u8.c                       |   47 -
 pl/math/sv_asinh_3u0.c                        |  129 --
 pl/math/sv_coshf_2u.c                         |   56 -
 pl/math/sv_erf_data.c                         | 1558 -----------------
 pl/math/sv_erff_data.c                        | 1046 -----------
 pl/math/sv_exp10f_1u5.c                       |   87 -
 pl/math/sv_exp2f_1u6.c                        |   80 -
 pl/math/sv_expf_2u.c                          |   86 -
 pl/math/sv_expf_inline.h                      |   66 -
 pl/math/sv_log10_2u5.c                        |   75 -
 pl/math/sv_log1pf_1u3.c                       |   97 -
 pl/math/sv_log1pf_inline.h                    |   65 -
 pl/math/sv_log2_3u.c                          |   73 -
 pl/math/sv_log_2u5.c                          |   76 -
 pl/math/sv_tan_3u5.c                          |   99 --
 pl/math/sv_tanhf_2u6.c                        |   59 -
 pl/math/test/mathbench_funcs.h                |   87 -
 pl/math/test/mathbench_wrappers.h             |  206 ---
 pl/math/test/pl_test.h                        |   39 -
 pl/math/test/runulp.sh                        |   78 -
 pl/math/test/testcases/directed/erff.tst      |   17 -
 pl/math/test/testcases/directed/log2.tst      |   21 -
 pl/math/test/testcases/directed/log2f.tst     |   27 -
 pl/math/test/testcases/random/double.tst      |    6 -
 pl/math/test/testcases/random/float.tst       |    8 -
 pl/math/test/ulp_funcs.h                      |   70 -
 pl/math/test/ulp_wrappers.h                   |  140 --
 pl/math/trigpi_references.c                   |   57 -
 pl/math/v_asinh_3u5.c                         |  175 --
 pl/math/v_asinhf_2u7.c                        |   80 -
 pl/math/v_atan2_3u.c                          |  121 --
 pl/math/v_exp_data.c                          |   55 -
 pl/math/v_exp_tail.h                          |   21 -
 pl/math/v_exp_tail_inline.h                   |  102 --
 pl/math/v_expf_inline.h                       |   60 -
 pl/math/v_expm1_2u5.c                         |  118 --
 pl/math/v_expm1f_1u6.c                        |  117 --
 pl/math/v_expm1f_inline.h                     |   63 -
 pl/math/v_log10_2u5.c                         |  120 --
 pl/math/v_log10f_3u5.c                        |   82 -
 pl/math/v_log1p_2u5.c                         |  128 --
 pl/math/v_log1p_inline.h                      |   91 -
 pl/math/v_log1pf_2u1.c                        |  126 --
 pl/math/v_log1pf_inline.h                     |   67 -
 pl/math/v_log2_3u.c                           |  109 --
 pl/math/v_log2f_2u5.c                         |   77 -
 pl/math/v_log_data.c                          |  161 --
 pl/math/v_sinh_3u.c                           |  118 --
 pl/math/v_tanh_3u.c                           |  106 --
 string/Dir.mk                                 |    9 +-
 string/aarch64/__mtag_tag_region.S            |    3 -
 string/aarch64/__mtag_tag_zero_region.S       |    3 -
 string/aarch64/asmdefs.h                      |   37 -
 .../aarch64/{ => experimental}/memchr-sve.S   |    8 +-
 .../aarch64/{ => experimental}/memcmp-sve.S   |    9 +-
 .../aarch64/{ => experimental}/stpcpy-sve.S   |    0
 .../aarch64/{ => experimental}/strchr-sve.S   |    7 +-
 .../{ => experimental}/strchrnul-sve.S        |    0
 .../aarch64/{ => experimental}/strcmp-sve.S   |    8 +-
 .../aarch64/{ => experimental}/strcpy-sve.S   |    8 +-
 .../aarch64/{ => experimental}/strlen-sve.S   |    7 +-
 .../aarch64/{ => experimental}/strncmp-sve.S  |    9 +-
 .../aarch64/{ => experimental}/strnlen-sve.S  |    8 +-
 .../aarch64/{ => experimental}/strrchr-sve.S  |    7 +-
 string/aarch64/memchr-mte.S                   |    2 -
 string/aarch64/memchr.S                       |    2 -
 string/aarch64/memcmp.S                       |    4 -
 string/aarch64/memcpy-advsimd.S               |    3 -
 string/aarch64/memcpy-mops.S                  |    4 -
 string/aarch64/memcpy-sve.S                   |    8 -
 string/aarch64/memcpy.S                       |    3 -
 string/aarch64/memmove-mops.S                 |    4 -
 string/aarch64/memrchr.S                      |    1 -
 string/aarch64/memset-mops.S                  |    3 -
 string/aarch64/memset-sve.S                   |  114 ++
 string/aarch64/memset.S                       |  104 +-
 string/aarch64/strchr-mte.S                   |    1 -
 string/aarch64/strchr.S                       |    1 -
 string/aarch64/strchrnul-mte.S                |    1 -
 string/aarch64/strchrnul.S                    |    1 -
 string/aarch64/strcmp.S                       |    2 -
 string/aarch64/strcpy.S                       |    2 -
 string/aarch64/strlen-mte.S                   |   38 +-
 string/aarch64/strlen.S                       |    1 -
 string/aarch64/strncmp.S                      |    3 -
 string/aarch64/strnlen.S                      |    2 -
 string/aarch64/strrchr-mte.S                  |    1 -
 string/aarch64/strrchr.S                      |    1 -
 string/bench/memcpy.c                         |  239 +--
 string/bench/memset.c                         |  141 +-
 string/bench/strlen.c                         |  206 +--
 string/include/benchlib.h                     |   31 +
 string/include/stringlib.h                    |    3 +-
 string/test/memcpy.c                          |    2 -
 string/test/memmove.c                         |    2 -
 string/test/memset.c                          |    3 +
 472 files changed, 11852 insertions(+), 14525 deletions(-)
 rename pl/math/v_acos_2u.c => math/aarch64/advsimd/acos.c (85%)
 rename pl/math/v_acosf_1u4.c => math/aarch64/advsimd/acosf.c (82%)
 rename pl/math/v_acosh_3u5.c => math/aarch64/advsimd/acosh.c (72%)
 rename pl/math/v_acoshf_3u1.c => math/aarch64/advsimd/acoshf.c (50%)
 rename pl/math/v_asin_3u.c => math/aarch64/advsimd/asin.c (56%)
 rename pl/math/v_asinf_2u5.c => math/aarch64/advsimd/asinf.c (82%)
 create mode 100644 math/aarch64/advsimd/asinh.c
 create mode 100644 math/aarch64/advsimd/asinhf.c
 rename pl/math/v_atan_2u5.c => math/aarch64/advsimd/atan.c (51%)
 create mode 100644 math/aarch64/advsimd/atan2.c
 rename pl/math/v_atan2f_3u.c => math/aarch64/advsimd/atan2f.c (54%)
 rename pl/math/v_atanf_3u.c => math/aarch64/advsimd/atanf.c (85%)
 rename pl/math/v_atanh_3u5.c => math/aarch64/advsimd/atanh.c (55%)
 rename pl/math/v_atanhf_3u1.c => math/aarch64/advsimd/atanhf.c (54%)
 rename pl/math/v_cbrt_2u.c => math/aarch64/advsimd/cbrt.c (76%)
 rename pl/math/v_cbrtf_1u7.c => math/aarch64/advsimd/cbrtf.c (91%)
 rename pl/math/v_cexpi_3u5.c => math/aarch64/advsimd/cexpi.c (79%)
 rename pl/math/v_cexpif_1u8.c => math/aarch64/advsimd/cexpif.c (80%)
 rename math/aarch64/{v_cos.c => advsimd/cos.c} (80%)
 rename math/aarch64/{v_cosf.c => advsimd/cosf.c} (76%)
 rename pl/math/v_cosh_2u.c => math/aarch64/advsimd/cosh.c (84%)
 rename pl/math/v_coshf_2u4.c => math/aarch64/advsimd/coshf.c (64%)
 rename pl/math/v_cospi_3u1.c => math/aarch64/advsimd/cospi.c (81%)
 rename pl/math/v_cospif_3u2.c => math/aarch64/advsimd/cospif.c (76%)
 rename pl/math/v_erf_2u5.c => math/aarch64/advsimd/erf.c (77%)
 rename pl/math/v_erfc_1u8.c => math/aarch64/advsimd/erfc.c (77%)
 rename pl/math/v_erfcf_1u7.c => math/aarch64/advsimd/erfcf.c (76%)
 rename pl/math/v_erff_2u.c => math/aarch64/advsimd/erff.c (76%)
 rename math/aarch64/{v_exp.c => advsimd/exp.c} (90%)
 rename pl/math/v_exp10_2u.c => math/aarch64/advsimd/exp10.c (89%)
 rename pl/math/v_exp10f_2u4.c => math/aarch64/advsimd/exp10f.c (58%)
 rename pl/math/v_exp2_2u.c => math/aarch64/advsimd/exp2.c (82%)
 rename math/aarch64/{v_exp2f.c => advsimd/exp2f.c} (58%)
 create mode 100644 math/aarch64/advsimd/exp2f_1u.c
 rename math/aarch64/{v_expf.c => advsimd/expf.c} (61%)
 create mode 100644 math/aarch64/advsimd/expf_1u.c
 create mode 100644 math/aarch64/advsimd/expm1.c
 create mode 100644 math/aarch64/advsimd/expm1f.c
 rename {pl/math => math/aarch64/advsimd}/finite_pow.h (94%)
 rename pl/math/v_hypot_1u5.c => math/aarch64/advsimd/hypot.c (74%)
 rename pl/math/v_hypotf_1u5.c => math/aarch64/advsimd/hypotf.c (68%)
 create mode 100644 math/aarch64/advsimd/log.c
 create mode 100644 math/aarch64/advsimd/log10.c
 create mode 100644 math/aarch64/advsimd/log10f.c
 create mode 100644 math/aarch64/advsimd/log1p.c
 create mode 100644 math/aarch64/advsimd/log1pf.c
 create mode 100644 math/aarch64/advsimd/log2.c
 create mode 100644 math/aarch64/advsimd/log2f.c
 create mode 100644 math/aarch64/advsimd/logf.c
 create mode 100644 math/aarch64/advsimd/modf.c
 create mode 100644 math/aarch64/advsimd/modff.c
 rename pl/math/v_pow_1u5.c => math/aarch64/advsimd/pow.c (60%)
 create mode 100644 math/aarch64/advsimd/powf.c
 rename math/aarch64/{v_sin.c => advsimd/sin.c} (77%)
 rename pl/math/v_sincos_3u5.c => math/aarch64/advsimd/sincos.c (70%)
 rename pl/math/v_sincosf_1u8.c => math/aarch64/advsimd/sincosf.c (70%)
 create mode 100644 math/aarch64/advsimd/sincospi.c
 create mode 100644 math/aarch64/advsimd/sincospif.c
 rename math/aarch64/{v_sinf.c => advsimd/sinf.c} (65%)
 create mode 100644 math/aarch64/advsimd/sinh.c
 rename pl/math/v_sinhf_2u3.c => math/aarch64/advsimd/sinhf.c (59%)
 rename pl/math/v_sinpi_3u1.c => math/aarch64/advsimd/sinpi.c (81%)
 rename pl/math/v_sinpif_3u.c => math/aarch64/advsimd/sinpif.c (76%)
 rename pl/math/v_tan_3u5.c => math/aarch64/advsimd/tan.c (86%)
 rename pl/math/v_tanf_3u5.c => math/aarch64/advsimd/tanf.c (83%)
 create mode 100644 math/aarch64/advsimd/tanh.c
 rename pl/math/v_tanhf_2u6.c => math/aarch64/advsimd/tanhf.c (62%)
 create mode 100644 math/aarch64/advsimd/tanpi.c
 create mode 100644 math/aarch64/advsimd/tanpif.c
 create mode 100644 math/aarch64/advsimd/v_expf_inline.h
 create mode 100644 math/aarch64/advsimd/v_expm1_inline.h
 create mode 100644 math/aarch64/advsimd/v_expm1f_inline.h
 create mode 100644 math/aarch64/advsimd/v_log1p_inline.h
 create mode 100644 math/aarch64/advsimd/v_log1pf_inline.h
 rename {pl/math => math/aarch64/advsimd}/v_log_inline.h (94%)
 rename {pl/math => math/aarch64/advsimd}/v_math.h (58%)
 rename pl/math/poly_advsimd_f32.h => math/aarch64/advsimd/v_poly_f32.h (81%)
 rename pl/math/poly_advsimd_f64.h => math/aarch64/advsimd/v_poly_f64.h (81%)
 rename {pl/math => math/aarch64/advsimd}/v_sincos_common.h (97%)
 rename {pl/math => math/aarch64/advsimd}/v_sincosf_common.h (98%)
 create mode 100644 math/aarch64/advsimd/v_sincospi_common.h
 create mode 100644 math/aarch64/advsimd/v_sincospif_common.h
 rename pl/math/cospi_3u1.c => math/aarch64/cospi_3u5.c (82%)
 rename {pl/math => math/aarch64}/cospif_2u6.c (79%)
 rename {pl => math/aarch64/experimental}/README.contributors (71%)
 rename {pl/math => math/aarch64/experimental}/acos_2u.c (76%)
 rename {pl/math => math/aarch64/experimental}/acosf_1u4.c (79%)
 rename {pl/math => math/aarch64/experimental}/acosh_3u.c (69%)
 rename {pl/math => math/aarch64/experimental}/acoshf_2u8.c (68%)
 rename pl/math/v_erfinv_25u.c => math/aarch64/experimental/advsimd/erfinv_25u.c (88%)
 rename pl/math/v_erfinvf_5u.c => math/aarch64/experimental/advsimd/erfinvf_5u.c (83%)
 rename {pl/math => math/aarch64/experimental/advsimd}/v_logf_inline.h (97%)
 rename {pl/math => math/aarch64/experimental}/asin_3u.c (78%)
 rename {pl/math => math/aarch64/experimental}/asin_data.c (94%)
 rename {pl/math => math/aarch64/experimental}/asinf_2u5.c (80%)
 rename {pl/math => math/aarch64/experimental}/asinf_data.c (92%)
 rename {pl/math => math/aarch64/experimental}/asinh_2u5.c (75%)
 rename {pl/math => math/aarch64/experimental}/asinh_data.c (51%)
 rename {pl/math => math/aarch64/experimental}/asinhf_3u5.c (77%)
 create mode 100644 math/aarch64/experimental/asinhf_data.c
 rename {pl/math => math/aarch64/experimental}/atan2_2u5.c (91%)
 rename {pl/math => math/aarch64/experimental}/atan2f_3u.c (90%)
 rename {pl/math => math/aarch64/experimental}/atan_2u5.c (79%)
 rename {pl/math => math/aarch64/experimental}/atan_common.h (95%)
 create mode 100644 math/aarch64/experimental/atan_data.c
 rename {pl/math => math/aarch64/experimental}/atanf_2u9.c (82%)
 rename {pl/math => math/aarch64/experimental}/atanf_common.h (96%)
 create mode 100644 math/aarch64/experimental/atanf_data.c
 rename {pl/math => math/aarch64/experimental}/atanh_3u.c (88%)
 rename {pl/math => math/aarch64/experimental}/atanhf_3u1.c (87%)
 rename {pl/math => math/aarch64/experimental}/cbrt_2u.c (89%)
 rename {pl/math => math/aarch64/experimental}/cbrt_data.c (93%)
 rename {pl/math => math/aarch64/experimental}/cbrtf_1u5.c (88%)
 rename {pl/math => math/aarch64/experimental}/cbrtf_data.c (93%)
 rename {pl/math => math/aarch64/experimental}/cosh_2u.c (70%)
 rename {pl/math => math/aarch64/experimental}/coshf_1u9.c (71%)
 rename {pl/math => math/aarch64/experimental}/erf_2u5.c (87%)
 rename {pl/math => math/aarch64/experimental}/erfc_1u8.c (90%)
 rename {pl/math => math/aarch64/experimental}/erfcf_1u7.c (86%)
 rename {pl/math => math/aarch64/experimental}/erff_2u.c (83%)
 rename {pl/math => math/aarch64/experimental}/erfinv_24u5.c (88%)
 rename {pl/math => math/aarch64/experimental}/erfinvf_4u7.c (88%)
 rename {pl/math => math/aarch64/experimental}/erfinvl.c (98%)
 rename pl/math/exp.c => math/aarch64/experimental/exp_inline.h (93%)
 rename {pl/math => math/aarch64/experimental}/expf_data.c (93%)
 rename {pl/math => math/aarch64/experimental}/expm1_2u5.c (83%)
 create mode 100644 math/aarch64/experimental/expm1_data.c
 rename {pl/math => math/aarch64/experimental}/expm1f_1u6.c (82%)
 rename {pl/math => math/aarch64/experimental}/expm1f_data.c (59%)
 rename {pl/math => math/aarch64/experimental}/log10_2u.c (84%)
 rename {pl/math => math/aarch64/experimental}/log10_data.c (99%)
 rename {pl/math => math/aarch64/experimental}/log1p_2u.c (91%)
 create mode 100644 math/aarch64/experimental/log1p_data.c
 rename {pl/math => math/aarch64/experimental}/log1pf_2u1.c (93%)
 rename {pl/math => math/aarch64/experimental}/log1pf_data.c (59%)
 rename {pl/math => math/aarch64/experimental}/sinh_3u.c (72%)
 rename {pl/math => math/aarch64/experimental}/sinhf_2u3.c (69%)
 create mode 100644 math/aarch64/experimental/sve/erfinv_25u.c
 create mode 100644 math/aarch64/experimental/sve/erfinvf_5u.c
 rename pl/math/sv_powi.c => math/aarch64/experimental/sve/powi.c (96%)
 rename pl/math/sv_powif.c => math/aarch64/experimental/sve/powif.c (96%)
 create mode 100644 math/aarch64/experimental/sve/sv_logf_inline.h
 rename {pl/math => math/aarch64/experimental}/tanf_3u3.c (80%)
 rename {pl/math => math/aarch64/experimental}/tanf_data.c (96%)
 rename {pl/math => math/aarch64/experimental}/tanh_3u.c (80%)
 rename {pl/math => math/aarch64/experimental}/tanhf_2u6.c (79%)
 create mode 100644 math/aarch64/sincospi_4u.c
 create mode 100644 math/aarch64/sincospif_3u2.c
 rename pl/math/sinpi_3u.c => math/aarch64/sinpi_3u5.c (76%)
 rename {pl/math => math/aarch64}/sinpif_2u5.c (75%)
 rename pl/math/sv_acos_2u.c => math/aarch64/sve/acos.c (85%)
 rename pl/math/sv_acosf_1u4.c => math/aarch64/sve/acosf.c (83%)
 create mode 100644 math/aarch64/sve/acosh.c
 create mode 100644 math/aarch64/sve/acoshf.c
 rename pl/math/sv_asin_3u.c => math/aarch64/sve/asin.c (80%)
 rename pl/math/sv_asinf_2u5.c => math/aarch64/sve/asinf.c (81%)
 create mode 100644 math/aarch64/sve/asinh.c
 rename pl/math/sv_asinhf_2u5.c => math/aarch64/sve/asinhf.c (53%)
 rename pl/math/sv_atan_2u5.c => math/aarch64/sve/atan.c (86%)
 rename pl/math/sv_atan2_2u5.c => math/aarch64/sve/atan2.c (74%)
 rename pl/math/sv_atan2f_3u.c => math/aarch64/sve/atan2f.c (68%)
 rename pl/math/sv_atanf_2u9.c => math/aarch64/sve/atanf.c (83%)
 rename pl/math/sv_atanh_3u3.c => math/aarch64/sve/atanh.c (72%)
 rename pl/math/sv_atanhf_2u8.c => math/aarch64/sve/atanhf.c (61%)
 rename pl/math/sv_cbrt_2u.c => math/aarch64/sve/cbrt.c (77%)
 rename pl/math/sv_cbrtf_1u7.c => math/aarch64/sve/cbrtf.c (92%)
 rename pl/math/sv_cexpi_3u5.c => math/aarch64/sve/cexpi.c (79%)
 rename pl/math/sv_cexpif_1u8.c => math/aarch64/sve/cexpif.c (80%)
 rename pl/math/sv_cos_2u5.c => math/aarch64/sve/cos.c (88%)
 rename pl/math/sv_cosf_2u1.c => math/aarch64/sve/cosf.c (87%)
 rename pl/math/sv_cosh_2u.c => math/aarch64/sve/cosh.c (77%)
 create mode 100644 math/aarch64/sve/coshf.c
 rename pl/math/sv_cospi_3u2.c => math/aarch64/sve/cospi.c (78%)
 rename pl/math/sv_cospif_2u6.c => math/aarch64/sve/cospif.c (75%)
 rename pl/math/sv_erf_2u5.c => math/aarch64/sve/erf.c (83%)
 rename pl/math/sv_erfc_1u8.c => math/aarch64/sve/erfc.c (91%)
 rename pl/math/sv_erfcf_1u7.c => math/aarch64/sve/erfcf.c (77%)
 rename pl/math/sv_erff_2u.c => math/aarch64/sve/erff.c (77%)
 rename pl/math/sv_exp_1u5.c => math/aarch64/sve/exp.c (79%)
 rename pl/math/sv_exp10_1u5.c => math/aarch64/sve/exp10.c (79%)
 create mode 100644 math/aarch64/sve/exp10f.c
 rename pl/math/sv_exp2_2u.c => math/aarch64/sve/exp2.c (72%)
 create mode 100644 math/aarch64/sve/exp2f.c
 create mode 100644 math/aarch64/sve/expf.c
 rename pl/math/sv_expm1_2u5.c => math/aarch64/sve/expm1.c (86%)
 rename pl/math/sv_expm1f_1u6.c => math/aarch64/sve/expm1f.c (67%)
 rename pl/math/sv_hypot_1u5.c => math/aarch64/sve/hypot.c (72%)
 rename pl/math/sv_hypotf_1u5.c => math/aarch64/sve/hypotf.c (69%)
 create mode 100644 math/aarch64/sve/log.c
 create mode 100644 math/aarch64/sve/log10.c
 rename pl/math/sv_log10f_3u5.c => math/aarch64/sve/log10f.c (56%)
 rename pl/math/sv_log1p_2u5.c => math/aarch64/sve/log1p.c (88%)
 create mode 100644 math/aarch64/sve/log1pf.c
 create mode 100644 math/aarch64/sve/log2.c
 rename pl/math/sv_log2f_2u5.c => math/aarch64/sve/log2f.c (53%)
 rename pl/math/sv_logf_3u4.c => math/aarch64/sve/logf.c (52%)
 create mode 100644 math/aarch64/sve/modf.c
 create mode 100644 math/aarch64/sve/modff.c
 rename pl/math/sv_pow_1u5.c => math/aarch64/sve/pow.c (64%)
 rename pl/math/sv_powf_2u6.c => math/aarch64/sve/powf.c (69%)
 rename pl/math/sv_sin_3u5.c => math/aarch64/sve/sin.c (89%)
 rename pl/math/sv_sincos_3u5.c => math/aarch64/sve/sincos.c (72%)
 rename pl/math/sv_sincosf_1u8.c => math/aarch64/sve/sincosf.c (72%)
 create mode 100644 math/aarch64/sve/sincospi.c
 create mode 100644 math/aarch64/sve/sincospif.c
 rename pl/math/sv_sinf_1u9.c => math/aarch64/sve/sinf.c (89%)
 rename pl/math/sv_sinh_3u.c => math/aarch64/sve/sinh.c (88%)
 rename pl/math/sv_sinhf_2u3.c => math/aarch64/sve/sinhf.c (78%)
 rename pl/math/sv_sinpi_3u1.c => math/aarch64/sve/sinpi.c (66%)
 rename pl/math/sv_sinpif_2u5.c => math/aarch64/sve/sinpif.c (61%)
 create mode 100644 math/aarch64/sve/sv_expf_inline.h
 rename {pl/math => math/aarch64/sve}/sv_expm1f_inline.h (65%)
 rename {pl/math => math/aarch64/sve}/sv_log1p_inline.h (90%)
 create mode 100644 math/aarch64/sve/sv_log1pf_inline.h
 create mode 100644 math/aarch64/sve/sv_log_inline.h
 rename {pl/math => math/aarch64/sve}/sv_math.h (72%)
 rename pl/math/poly_sve_f32.h => math/aarch64/sve/sv_poly_f32.h (78%)
 rename pl/math/poly_sve_f64.h => math/aarch64/sve/sv_poly_f64.h (78%)
 rename pl/math/poly_sve_generic.h => math/aarch64/sve/sv_poly_generic.h (91%)
 rename {pl/math => math/aarch64/sve}/sv_sincos_common.h (97%)
 rename {pl/math => math/aarch64/sve}/sv_sincosf_common.h (98%)
 create mode 100644 math/aarch64/sve/sv_sincospi_common.h
 create mode 100644 math/aarch64/sve/sv_sincospif_common.h
 create mode 100644 math/aarch64/sve/tan.c
 rename pl/math/sv_tanf_3u5.c => math/aarch64/sve/tanf.c (79%)
 rename pl/math/sv_tanh_3u.c => math/aarch64/sve/tanh.c (86%)
 create mode 100644 math/aarch64/sve/tanhf.c
 create mode 100644 math/aarch64/sve/tanpi.c
 create mode 100644 math/aarch64/sve/tanpif.c
 create mode 100644 math/aarch64/tanpi_2u5.c
 create mode 100644 math/aarch64/tanpif_3u1.c
 rename pl/math/erf_data.c => math/aarch64/v_erf_data.c (99%)
 rename pl/math/erfc_data.c => math/aarch64/v_erfc_data.c (99%)
 rename pl/math/erfcf_data.c => math/aarch64/v_erfcf_data.c (98%)
 rename pl/math/erff_data.c => math/aarch64/v_erff_data.c (98%)
 delete mode 100644 math/aarch64/v_exp2f_1u.c
 rename {pl/math => math/aarch64}/v_exp_tail_data.c (98%)
 delete mode 100644 math/aarch64/v_expf_1u.c
 delete mode 100644 math/aarch64/v_log.c
 rename {pl/math => math/aarch64}/v_log10_data.c (99%)
 rename {pl/math => math/aarch64}/v_log2_data.c (99%)
 delete mode 100644 math/aarch64/v_logf.c
 delete mode 100644 math/aarch64/v_math.h
 delete mode 100644 math/aarch64/v_pow.c
 rename {pl/math => math/aarch64}/v_pow_exp_data.c (99%)
 rename {pl/math => math/aarch64}/v_pow_log_data.c (99%)
 delete mode 100644 math/aarch64/v_powf.c
 rename {pl/math => math/aarch64}/v_powf_data.c (98%)
 create mode 100644 math/include/test_defs.h
 create mode 100644 math/include/test_sig.h
 rename {pl/math => math}/log10f.c (84%)
 rename {pl/math => math}/poly_generic.h (99%)
 rename {pl/math => math}/poly_scalar_f32.h (80%)
 rename {pl/math => math}/poly_scalar_f64.h (80%)
 create mode 100644 math/test/test_defs.h
 rename {pl/math => math}/test/testcases/directed/acos.tst (95%)
 rename {pl/math => math}/test/testcases/directed/acosf.tst (95%)
 rename {pl/math => math}/test/testcases/directed/acosh.tst (96%)
 rename {pl/math => math}/test/testcases/directed/acoshf.tst (95%)
 rename {pl/math => math}/test/testcases/directed/asin.tst (97%)
 rename {pl/math => math}/test/testcases/directed/asinf.tst (96%)
 rename {pl/math => math}/test/testcases/directed/asinh.tst (95%)
 rename {pl/math => math}/test/testcases/directed/asinhf.tst (95%)
 rename {pl/math => math}/test/testcases/directed/atan.tst (96%)
 rename {pl/math => math}/test/testcases/directed/atan2.tst (99%)
 rename {pl/math => math}/test/testcases/directed/atan2f.tst (99%)
 rename {pl/math => math}/test/testcases/directed/atanf.tst (95%)
 rename {pl/math => math}/test/testcases/directed/atanh.tst (97%)
 rename {pl/math => math}/test/testcases/directed/atanhf.tst (96%)
 rename {pl/math => math}/test/testcases/directed/cbrtf.tst (97%)
 rename {pl/math => math}/test/testcases/directed/cosh.tst (95%)
 rename {pl/math => math}/test/testcases/directed/coshf.tst (93%)
 rename {pl/math => math}/test/testcases/directed/erfc.tst (96%)
 rename {pl/math => math}/test/testcases/directed/erfcf.tst (93%)
 rename {pl/math => math}/test/testcases/directed/expm1.tst (96%)
 rename {pl/math => math}/test/testcases/directed/expm1f.tst (98%)
 rename {pl/math => math}/test/testcases/directed/log10.tst (95%)
 rename {pl/math => math}/test/testcases/directed/log10f.tst (98%)
 rename {pl/math => math}/test/testcases/directed/log1p.tst (96%)
 rename {pl/math => math}/test/testcases/directed/log1pf.tst (99%)
 rename {pl/math => math}/test/testcases/directed/sinh.tst (96%)
 rename {pl/math => math}/test/testcases/directed/sinhf.tst (95%)
 rename {pl/math => math}/test/testcases/directed/tanf.tst (96%)
 rename {pl/math => math}/test/testcases/directed/tanh.tst (95%)
 rename {pl/math => math}/test/testcases/directed/tanhf.tst (95%)
 create mode 100644 math/test/trigpi_references.h
 rename {pl/math => math}/tools/asin.sollya (93%)
 rename {pl/math => math}/tools/asinf.sollya (94%)
 rename {pl/math => math}/tools/asinh.sollya (94%)
 rename {pl/math => math}/tools/asinhf.sollya (93%)
 rename {pl/math => math}/tools/atan.sollya (93%)
 rename {pl/math => math}/tools/atanf.sollya (92%)
 rename {pl/math => math}/tools/cbrt.sollya (90%)
 rename {pl/math => math}/tools/cbrtf.sollya (90%)
 rename {pl/math => math}/tools/erf.sollya (92%)
 rename {pl/math => math}/tools/erfc.sollya (95%)
 rename {pl/math => math}/tools/erfcf.sollya (91%)
 rename {pl/math => math}/tools/erff.sollya (91%)
 rename {pl/math => math}/tools/exp10.sollya (97%)
 rename {pl/math => math}/tools/expm1.sollya (91%)
 rename {pl/math => math}/tools/expm1f.sollya (91%)
 rename {pl/math => math}/tools/log10.sollya (96%)
 rename {pl/math => math}/tools/log10f.sollya (96%)
 rename {pl/math => math}/tools/log1p.sollya (93%)
 rename {pl/math => math}/tools/log1pf.sollya (91%)
 rename {pl/math => math}/tools/sincos.sollya (92%)
 rename {pl/math => math}/tools/sincosf.sollya (95%)
 rename {pl/math => math}/tools/sinpi.sollya (95%)
 rename {pl/math => math}/tools/tan.sollya (91%)
 rename {pl/math => math}/tools/tanf.sollya (98%)
 create mode 100644 math/tools/tanpi.sollya
 rename {pl/math => math}/tools/v_erf.sollya (91%)
 rename {pl/math => math}/tools/v_erfc.sollya (96%)
 rename {pl/math => math}/tools/v_log10.sollya (96%)
 rename {pl/math => math}/tools/v_log10f.sollya (96%)
 rename {pl/math => math}/tools/v_log2f.sollya (96%)
 delete mode 100644 pl/Dir.mk
 delete mode 100644 pl/math/Dir.mk
 delete mode 100644 pl/math/asinhf_data.c
 delete mode 100644 pl/math/atan_data.c
 delete mode 100644 pl/math/atanf_data.c
 delete mode 100644 pl/math/exp_data.c
 delete mode 100644 pl/math/expf.c
 delete mode 100644 pl/math/expm1_data.c
 delete mode 100644 pl/math/include/mathlib.h
 delete mode 100644 pl/math/include/pl_test.h
 delete mode 100644 pl/math/log.c
 delete mode 100644 pl/math/log1p_data.c
 delete mode 100644 pl/math/log_data.c
 delete mode 100644 pl/math/logf.c
 delete mode 100644 pl/math/logf_data.c
 delete mode 100644 pl/math/math_config.h
 delete mode 100644 pl/math/math_err.c
 delete mode 100644 pl/math/math_errf.c
 delete mode 100644 pl/math/pl_sig.h
 delete mode 100644 pl/math/sv_acosh_3u5.c
 delete mode 100644 pl/math/sv_acoshf_2u8.c
 delete mode 100644 pl/math/sv_asinh_3u0.c
 delete mode 100644 pl/math/sv_coshf_2u.c
 delete mode 100644 pl/math/sv_erf_data.c
 delete mode 100644 pl/math/sv_erff_data.c
 delete mode 100644 pl/math/sv_exp10f_1u5.c
 delete mode 100644 pl/math/sv_exp2f_1u6.c
 delete mode 100644 pl/math/sv_expf_2u.c
 delete mode 100644 pl/math/sv_expf_inline.h
 delete mode 100644 pl/math/sv_log10_2u5.c
 delete mode 100644 pl/math/sv_log1pf_1u3.c
 delete mode 100644 pl/math/sv_log1pf_inline.h
 delete mode 100644 pl/math/sv_log2_3u.c
 delete mode 100644 pl/math/sv_log_2u5.c
 delete mode 100644 pl/math/sv_tan_3u5.c
 delete mode 100644 pl/math/sv_tanhf_2u6.c
 delete mode 100644 pl/math/test/mathbench_funcs.h
 delete mode 100644 pl/math/test/mathbench_wrappers.h
 delete mode 100644 pl/math/test/pl_test.h
 delete mode 100755 pl/math/test/runulp.sh
 delete mode 100644 pl/math/test/testcases/directed/erff.tst
 delete mode 100644 pl/math/test/testcases/directed/log2.tst
 delete mode 100644 pl/math/test/testcases/directed/log2f.tst
 delete mode 100644 pl/math/test/testcases/random/double.tst
 delete mode 100644 pl/math/test/testcases/random/float.tst
 delete mode 100644 pl/math/test/ulp_funcs.h
 delete mode 100644 pl/math/test/ulp_wrappers.h
 delete mode 100644 pl/math/trigpi_references.c
 delete mode 100644 pl/math/v_asinh_3u5.c
 delete mode 100644 pl/math/v_asinhf_2u7.c
 delete mode 100644 pl/math/v_atan2_3u.c
 delete mode 100644 pl/math/v_exp_data.c
 delete mode 100644 pl/math/v_exp_tail.h
 delete mode 100644 pl/math/v_exp_tail_inline.h
 delete mode 100644 pl/math/v_expf_inline.h
 delete mode 100644 pl/math/v_expm1_2u5.c
 delete mode 100644 pl/math/v_expm1f_1u6.c
 delete mode 100644 pl/math/v_expm1f_inline.h
 delete mode 100644 pl/math/v_log10_2u5.c
 delete mode 100644 pl/math/v_log10f_3u5.c
 delete mode 100644 pl/math/v_log1p_2u5.c
 delete mode 100644 pl/math/v_log1p_inline.h
 delete mode 100644 pl/math/v_log1pf_2u1.c
 delete mode 100644 pl/math/v_log1pf_inline.h
 delete mode 100644 pl/math/v_log2_3u.c
 delete mode 100644 pl/math/v_log2f_2u5.c
 delete mode 100644 pl/math/v_log_data.c
 delete mode 100644 pl/math/v_sinh_3u.c
 delete mode 100644 pl/math/v_tanh_3u.c
 rename string/aarch64/{ => experimental}/memchr-sve.S (96%)
 rename string/aarch64/{ => experimental}/memcmp-sve.S (93%)
 rename string/aarch64/{ => experimental}/stpcpy-sve.S (100%)
 rename string/aarch64/{ => experimental}/strchr-sve.S (97%)
 rename string/aarch64/{ => experimental}/strchrnul-sve.S (100%)
 rename string/aarch64/{ => experimental}/strcmp-sve.S (96%)
 rename string/aarch64/{ => experimental}/strcpy-sve.S (96%)
 rename string/aarch64/{ => experimental}/strlen-sve.S (96%)
 rename string/aarch64/{ => experimental}/strncmp-sve.S (95%)
 rename string/aarch64/{ => experimental}/strnlen-sve.S (96%)
 rename string/aarch64/{ => experimental}/strrchr-sve.S (98%)
 create mode 100644 string/aarch64/memset-sve.S

diff --git a/MAINTAINERS b/MAINTAINERS
index 6c5823a8dbce5a..06cceb8f2501ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1,12 +1,9 @@
 /
-	Szabolcs Nagy <szabolcs.nagy@arm.com>
+	Tamar Christina <tamar.christina@arm.com>
 math/
-	Szabolcs Nagy <szabolcs.nagy@arm.com>
-networking/
-	Szabolcs Nagy <szabolcs.nagy@arm.com>
-pl/
 	Pierre Blanchard <pierre.blanchard@arm.com>
 	Joe Ramsay <joe.ramsay@arm.com>
+networking/
+	Ola Liljedahl <ola.liljedahl@arm.com>
 string/
-	Szabolcs Nagy <szabolcs.nagy@arm.com>
 	Wilco Dijkstra <wilco.dijkstra@arm.com>
diff --git a/Makefile b/Makefile
index c487896728c2cd..e7503dbd2f6075 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 # Makefile - requires GNU make
 #
-# Copyright (c) 2018-2022, Arm Limited.
+# Copyright (c) 2018-2024, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 srcdir = .
@@ -11,7 +11,6 @@ includedir = $(prefix)/include
 
 # Configure these in config.mk, do not make changes in this file.
 SUBS = math string networking
-PLSUBS = math
 HOST_CC = cc
 HOST_CFLAGS = -std=c99 -O2
 HOST_LDFLAGS =
@@ -21,12 +20,22 @@ CPPFLAGS =
 CFLAGS = -std=c99 -O2
 CFLAGS_SHARED = -fPIC
 CFLAGS_ALL = -Ibuild/include $(CPPFLAGS) $(CFLAGS)
-CFLAGS_PL = -Ibuild/pl/include $(CPPFLAGS) $(CFLAGS) -DPL
 LDFLAGS =
 LDLIBS =
 AR = $(CROSS_COMPILE)ar
 RANLIB = $(CROSS_COMPILE)ranlib
 INSTALL = install
+# Detect OS.
+# Assume Unix environment: Linux, Darwin, or Msys.
+OS := $(shell uname -s)
+OS := $(patsubst MSYS%,Msys,$(OS))
+# Following math dependencies can be adjusted in config file
+# if necessary, e.g. for Msys.
+libm-libs = -lm
+libc-libs = -lc
+mpfr-libs = -lmpfr
+gmp-libs = -lgmp
+mpc-libs = -lmpc
 
 all:
 
@@ -53,7 +62,6 @@ $(DIRS):
 	mkdir -p $@
 
 $(filter %.os,$(ALL_FILES)): CFLAGS_ALL += $(CFLAGS_SHARED)
-$(filter %.os,$(ALL_FILES)): CFLAGS_PL += $(CFLAGS_SHARED)
 
 build/%.o: $(srcdir)/%.S
 	$(CC) $(CFLAGS_ALL) -c -o $@ $<
diff --git a/README b/README
index 651ebdc84bc865..4bbed76d75c824 100644
--- a/README
+++ b/README
@@ -12,12 +12,25 @@ contribution requirements are documented in README.contributors of
 the appropriate subdirectory.
 
 Regular quarterly releases are tagged as vYY.MM, the latest
-release is v24.01.
+release is v25.01.
 
 Source code layout:
 
 build/          - build directory (created by make).
-math/           - math subproject sources.
+math/           - math subproject sources for generic scalar
+                  subroutines and sources shared with
+                  subdirectories of math/.
+                  All math routines should meet the quality
+                  requirements stated in math/README.contributors,
+                  routines that fail to do so are located in an
+                  experimental/ directory.
+math/aarch64/   - math subproject AArch64-specific sources
+                  and sources shared with subdirectories.
+math/aarch64/advsimd      - AdvSIMD-specific math sources.
+math/aarch64/experimental - Experimental math sources do not
+                            meet quality requirements stated in
+                            math/README.contributors.
+math/aarch64/sve          - SVE-specific math sources.
 math/include/   - math library public headers.
 math/test/      - math test and benchmark related sources.
 math/tools/     - tools used for designing the algorithms.
@@ -25,9 +38,16 @@ networking/     - networking subproject sources.
 networking/include/ - networking library public headers.
 networking/test/ - networking test and benchmark related sources.
 string/         - string routines subproject sources.
+                  All string routines should meet the quality
+                  requirements stated in string/README.contributors,
+                  routines that fail to do so are located in an
+                  experimental/ directory.
+string/<arch>   - <arch>-specific string routines sources for
+                  <arch>=aarch64, and arm.
+string/aarch64/experimental - Experimental string routines which
+                              may not be fully optimized yet.
 string/include/ - string library public headers.
 string/test/    - string test and benchmark related sources.
-pl/...          - separately maintained performance library code.
 
 The steps to build the target libraries and run the tests:
 
@@ -50,6 +70,13 @@ Or building and testing the math subproject only:
 make all-math
 make check-math
 
+Note on compiler compability/requirement:
+
+SVE routines are always built by default - this means that on AArch64
+GCC >= 10 or LLVM >= 5 are always required for SVE ACLE compatibility.
+There is no explicit check for compatible compiler, therefore the SVE
+routines will fail to build if CC is too old.
+
 The test system requires libmpfr and libmpc.
 For example on debian linux they can be installed as:
 
diff --git a/config.mk.dist b/config.mk.dist
index 03fb54db52fabe..ae4574e7cdba80 100644
--- a/config.mk.dist
+++ b/config.mk.dist
@@ -1,14 +1,11 @@
 # Example config.mk
 #
-# Copyright (c) 2018-2023, Arm Limited.
+# Copyright (c) 2018-2024, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 # Subprojects to build
 SUBS = math string networking
 
-# Subsubprojects to build if subproject pl is built
-PLSUBS = math
-
 # Target architecture: aarch64, arm or x86_64
 ARCH = aarch64
 
@@ -30,6 +27,27 @@ HOST_CFLAGS += -Wall -Wno-unused-function
 HOST_CFLAGS += -g
 CFLAGS += -g
 
+ifeq ($(OS),Msys)
+  # llvm is the only available/valid native compiler
+  CC = clang
+  AR = llvm-ar
+  RANLIB = llvm-ranlib
+  HOST_CC = clang
+  SYSROOT = /c/wenv/msys2/msys64/clangarm64
+  # Common windows flags
+  COMMON_WIN_CFLAGS = -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
+  COMMON_WIN_CFLAGS += -Wno-deprecated-declarations -Wno-unused-variable
+  # For mathtest
+  HOST_CFLAGS += -I$(SYSROOT)/include
+  HOST_CFLAGS += $(COMMON_WIN_CFLAGS) -Wno-ignored-attributes
+  # Clear the default flag -fPIC, as not supported on Windows
+  CFLAGS_SHARED =
+  # For ulp.h with MPFR
+  CFLAGS += -I$(SYSROOT)/include
+  # For clang on Windows
+  CFLAGS += $(COMMON_WIN_CFLAGS)
+endif
+
 # Optimize the shared libraries on aarch64 assuming they fit in 1M.
 #CFLAGS_SHARED = -fPIC -mcmodel=tiny
 
@@ -45,12 +63,33 @@ math-cflags =
 math-ldlibs =
 math-ulpflags =
 math-testflags =
-string-cflags =
+string-cflags = -falign-functions=64
 networking-cflags =
 
-# Use if mpfr is available on the target for ulp error checking.
-#math-ldlibs += -lmpfr -lgmp
-#math-cflags += -DUSE_MPFR
+ifeq ($(OS),Msys)
+  # Libraries can be installed with pacman
+  libm-libs = -lmsvcrt -lvcruntime -lucrt
+  libc-libs =
+  # Linker will look for .lib but some systems only have .dll.a,
+  # therefore we have to give absolute path to libraries.
+  # This is system dependent and might need adjusting.
+  mpfr-libs = $(SYSROOT)/lib/libmpfr.dll.a
+  gmp-libs = $(SYSROOT)/lib/libgmp.dll.a
+  mpc-libs = $(SYSROOT)/lib/libmpc.dll.a
+endif
+
+# Use if mpfr is available on the target for ulp error checking. If
+# enabling this, it is advised to disable fenv checks by uncommenting
+# the two lines at the bottom of this block.
+USE_MPFR=0
+math-cflags += -DUSE_MPFR=$(USE_MPFR)
+ifeq ($(USE_MPFR), 1)
+  math-ldlibs += $(mpfr-libs) $(gmp-libs)
+  math-ulpflags += -m
+endif
+# Disable fenv checks
+#math-ulpflags = -q -f
+#math-testflags = -nostatus
 
 # Use with gcc.
 math-cflags += -frounding-math -fexcess-precision=standard -fno-stack-protector
@@ -59,30 +98,36 @@ math-cflags += -ffp-contract=fast -fno-math-errno
 # Use with clang.
 #math-cflags += -ffp-contract=fast
 
-# Disable/enable SVE vector math code and tests.
-# If WANT_SVE_MATH is enabled, math-sve-cflags is added for SVE
-# routines only so that SVE code does not leak into scalar
-# routines. It is also necessary to add it for tools (e.g. ulp,
-# mathbench)
-WANT_SVE_MATH = 0
-ifeq ($(WANT_SVE_MATH), 1)
-  math-sve-cflags = -march=armv8-a+sve
-endif
-math-cflags += -DWANT_SVE_MATH=$(WANT_SVE_MATH)
-
 # If defined to 1, set errno in math functions according to ISO C.  Many math
 # libraries do not set errno, so this is 0 by default.  It may need to be
 # set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.
 WANT_ERRNO = 0
 math-cflags += -DWANT_ERRNO=$(WANT_ERRNO)
 
+# Disable/enable SVE vector math tests/tools.
+ifeq ($(ARCH),aarch64)
+  WANT_SVE_TESTS = 1
+else
+  WANT_SVE_TESTS = 0
+endif
+math-cflags += -DWANT_SVE_TESTS=$(WANT_SVE_TESTS)
+
 # If set to 1, set fenv in vector math routines.
 WANT_SIMD_EXCEPT = 0
 math-cflags += -DWANT_SIMD_EXCEPT=$(WANT_SIMD_EXCEPT)
 
-# Disable fenv checks
-#math-ulpflags = -q -f
-#math-testflags = -nostatus
+# If set to 1, enable tests for exp10.
+WANT_EXP10_TESTS = 1
+math-cflags += -DWANT_EXP10_TESTS=$(WANT_EXP10_TESTS)
+
+# If set to 1, enable tests for sinpi and cospi. These functions are
+# only supported on aarch64
+ifeq ($(ARCH),aarch64)
+  WANT_TRIGPI_TESTS = 1
+else
+  WANT_TRIGPI_TESTS = 0
+endif
+math-cflags += -DWANT_TRIGPI_TESTS=$(WANT_TRIGPI_TESTS)
 
 # Remove GNU Property Notes from asm files.
 #string-cflags += -DWANT_GNU_PROPERTY=0
@@ -92,3 +137,13 @@ math-cflags += -DWANT_SIMD_EXCEPT=$(WANT_SIMD_EXCEPT)
 
 # Avoid auto-vectorization of scalar code and unroll loops
 networking-cflags += -O2 -fno-tree-vectorize -funroll-loops
+
+# Provide *_finite symbols and some of the glibc hidden symbols
+# so libmathlib can be used with binaries compiled against glibc
+# to interpose math functions with both static and dynamic linking
+USE_GLIBC_ABI = 1
+math-cflags += -DUSE_GLIBC_ABI=$(USE_GLIBC_ABI)
+
+# Enable experimental math routines - non-C23 vector math and low-accuracy scalar
+WANT_EXPERIMENTAL_MATH = 0
+math-cflags += -DWANT_EXPERIMENTAL_MATH=$(WANT_EXPERIMENTAL_MATH)
diff --git a/math/Dir.mk b/math/Dir.mk
index 5e9494a7bd3cbc..6277241ac4de9a 100644
--- a/math/Dir.mk
+++ b/math/Dir.mk
@@ -1,23 +1,61 @@
 # Makefile fragment - requires GNU make
 #
-# Copyright (c) 2019-2023, Arm Limited.
+# Copyright (c) 2019-2024, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
-S := $(srcdir)/math
-B := build/math
-
-math-lib-srcs := $(wildcard $(S)/*.[cS])
-math-lib-srcs += $(wildcard $(S)/$(ARCH)/*.[cS])
+.SECONDEXPANSION:
+
+ifneq ($(OS),Linux)
+  ifeq ($(WANT_SIMD_EXCEPT),1)
+    $(error WANT_SIMD_EXCEPT is not supported outside Linux)
+  endif
+  ifneq ($(USE_MPFR),1)
+    $(warning WARNING: Double-precision ULP tests will not be usable without MPFR)
+  endif
+  ifeq ($(USE_GLIBC_ABI),1)
+    $(error Can only generate special GLIBC symbols on Linux - please disable USE_GLIBC_ABI)
+  endif
+endif
+
+ifneq ($(ARCH),aarch64)
+  ifeq ($(WANT_TRIGPI_TESTS),1)
+    $(error trigpi functions only supported on aarch64)
+  endif
+  ifeq ($(WANT_EXPERIMENTAL_MATH),1)
+    $(error Experimental math only supported on aarch64)
+  endif
+endif
+
+math-src-dir := $(srcdir)/math
+math-build-dir := build/math
+
+math-lib-srcs := $(wildcard $(math-src-dir)/*.[cS])
+math-lib-srcs += $(wildcard $(math-src-dir)/$(ARCH)/*.[cS])
+ifeq ($(OS),Linux)
+# Vector symbols only supported on Linux
+math-lib-srcs += $(wildcard $(math-src-dir)/$(ARCH)/*/*.[cS])
+endif
+
+ifeq ($(WANT_EXPERIMENTAL_MATH), 1)
+ifeq ($(OS),Linux)
+# Vector symbols only supported on Linux
+math-lib-srcs += $(wildcard $(math-src-dir)/$(ARCH)/experimental/*/*.[cS])
+else
+math-lib-srcs += $(wildcard $(math-src-dir)/$(ARCH)/experimental/*.[cS])
+endif
+else
+# Scalar experimental symbols will have been added by wildcard, so remove them
+math-lib-srcs := $(filter-out $(math-src-dir)/aarch64/experimental/%, $(math-lib-srcs))
+endif
 
 math-test-srcs := \
-	$(S)/test/mathtest.c \
-	$(S)/test/mathbench.c \
-	$(S)/test/ulp.c \
+	$(math-src-dir)/test/mathtest.c \
+	$(math-src-dir)/test/mathbench.c \
+	$(math-src-dir)/test/ulp.c \
 
-math-test-host-srcs := $(wildcard $(S)/test/rtest/*.[cS])
+math-test-host-srcs := $(wildcard $(math-src-dir)/test/rtest/*.[cS])
 
-math-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
-math-test-includes := $(patsubst $(S)/%,build/include/%,$(wildcard $(S)/test/*.h))
+math-includes := $(patsubst $(math-src-dir)/%,build/%,$(wildcard $(math-src-dir)/include/*.h))
 
 math-libs := \
 	build/lib/libmathlib.so \
@@ -33,9 +71,9 @@ math-tools := \
 math-host-tools := \
 	build/bin/rtest \
 
-math-lib-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-lib-srcs)))
-math-test-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-srcs)))
-math-host-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
+math-lib-objs := $(patsubst $(math-src-dir)/%,$(math-build-dir)/%.o,$(basename $(math-lib-srcs)))
+math-test-objs := $(patsubst $(math-src-dir)/%,$(math-build-dir)/%.o,$(basename $(math-test-srcs)))
+math-host-objs := $(patsubst $(math-src-dir)/%,$(math-build-dir)/%.o,$(basename $(math-test-host-srcs)))
 math-target-objs := $(math-lib-objs) $(math-test-objs)
 math-objs := $(math-target-objs) $(math-target-objs:%.o=%.os) $(math-host-objs)
 
@@ -44,18 +82,69 @@ math-files := \
 	$(math-libs) \
 	$(math-tools) \
 	$(math-host-tools) \
-	$(math-includes) \
-	$(math-test-includes) \
+	$(math-includes)
 
-all-math: $(math-libs) $(math-tools) $(math-includes) $(math-test-includes)
+all-math: $(math-libs) $(math-tools) $(math-includes)
 
-$(math-objs): $(math-includes) $(math-test-includes)
+$(math-objs): $(math-includes)
 $(math-objs): CFLAGS_ALL += $(math-cflags)
-$(B)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
+$(math-build-dir)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
 $(math-host-objs): CC = $(HOST_CC)
 $(math-host-objs): CFLAGS_ALL = $(HOST_CFLAGS)
 
-$(B)/test/ulp.o: $(S)/test/ulp.h
+# Add include path for experimental routines so they can share helpers with non-experimental
+$(math-build-dir)/aarch64/experimental/advsimd/%: CFLAGS_ALL += -I$(math-src-dir)/aarch64/advsimd
+$(math-build-dir)/aarch64/experimental/sve/%: CFLAGS_ALL += -I$(math-src-dir)/aarch64/sve
+
+$(math-objs): CFLAGS_ALL += -I$(math-src-dir)
+
+ulp-funcs-dir = build/test/ulp-funcs/
+ulp-wrappers-dir = build/test/ulp-wrappers/
+mathbench-funcs-dir = build/test/mathbench-funcs/
+test-sig-dirs = $(ulp-funcs-dir) $(ulp-wrappers-dir) $(mathbench-funcs-dir)
+build/include/test $(test-sig-dirs) $(addsuffix /$(ARCH),$(test-sig-dirs)) $(addsuffix /aarch64/experimental,$(test-sig-dirs)) \
+$(addsuffix /aarch64/experimental/advsimd,$(test-sig-dirs)) $(addsuffix /aarch64/experimental/sve,$(test-sig-dirs)) \
+$(addsuffix /aarch64/advsimd,$(test-sig-dirs)) $(addsuffix /aarch64/sve,$(test-sig-dirs)):
+	mkdir -p $@
+
+ulp-funcs = $(patsubst $(math-src-dir)/%,$(ulp-funcs-dir)/%,$(basename $(math-lib-srcs)))
+ulp-wrappers = $(patsubst $(math-src-dir)/%,$(ulp-wrappers-dir)/%,$(basename $(math-lib-srcs)))
+mathbench-funcs = $(patsubst $(math-src-dir)/%,$(mathbench-funcs-dir)/%,$(basename $(math-lib-srcs)))
+
+ifeq ($(WANT_SVE_TESTS), 0)
+  # Filter out anything with sve in the path
+  ulp-funcs := $(foreach a,$(ulp-funcs),$(if $(findstring sve,$a),,$a))
+  ulp-wrappers := $(foreach a,$(ulp-wrappers),$(if $(findstring sve,$a),,$a))
+  mathbench-funcs := $(foreach a,$(mathbench-funcs),$(if $(findstring sve,$a),,$a))
+endif
+
+define emit_sig
+$1/aarch64/experimental/sve/%.i: EXTRA_INC = -I$(math-src-dir)/aarch64/sve
+$1/aarch64/experimental/advsimd/%.i: EXTRA_INC = -I$(math-src-dir)/aarch64/advsimd
+$1/%.i: $(math-src-dir)/%.c | $$$$(@D)
+	$(CC) $$< $(math-cflags) -I$(math-src-dir)/include -I$(math-src-dir) $$(EXTRA_INC) -D$2 -E -o $$@
+$1/%: $1/%.i
+	{ grep TEST_SIG $$< || true; } | cut -f 2- -d ' ' > $$@
+endef
+
+$(eval $(call emit_sig,$(ulp-funcs-dir),EMIT_ULP_FUNCS))
+$(eval $(call emit_sig,$(ulp-wrappers-dir),EMIT_ULP_WRAPPERS))
+$(eval $(call emit_sig,$(mathbench-funcs-dir),EMIT_MATHBENCH_FUNCS))
+
+ulp-funcs-gen = build/include/test/ulp_funcs_gen.h
+ulp-wrappers-gen = build/include/test/ulp_wrappers_gen.h
+mathbench-funcs-gen = build/include/test/mathbench_funcs_gen.h
+math-tools-autogen-headers = $(ulp-funcs-gen) $(ulp-wrappers-gen) $(mathbench-funcs-gen)
+
+$(ulp-funcs-gen): $(ulp-funcs) | $$(@D)
+$(ulp-wrappers-gen): $(ulp-wrappers) | $$(@D)
+$(mathbench-funcs-gen): $(mathbench-funcs) | $$(@D)
+
+$(math-tools-autogen-headers): | $$(@D)
+	cat $^ | sort -u > $@
+
+$(math-build-dir)/test/mathbench.o: $(mathbench-funcs-gen)
+$(math-build-dir)/test/ulp.o: $(math-src-dir)/test/ulp.h $(ulp-funcs-gen) $(ulp-wrappers-gen)
 
 build/lib/libmathlib.so: $(math-lib-objs:%.o=%.os)
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
@@ -65,38 +154,40 @@ build/lib/libmathlib.a: $(math-lib-objs)
 	$(AR) rc $@ $^
 	$(RANLIB) $@
 
-$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
-$(math-tools): LDLIBS += $(math-ldlibs) -lm
-# math-sve-cflags should be empty if WANT_SVE_MATH is not enabled
-$(math-tools): CFLAGS_ALL += $(math-sve-cflags)
+$(math-host-tools): HOST_LDLIBS += $(libm-libs) $(mpfr-libs) $(mpc-libs)
+$(math-tools): LDLIBS += $(math-ldlibs) $(libm-libs)
+
+ifneq ($(OS),Darwin)
+  $(math-tools): LDFLAGS += -static
+endif
 
 build/bin/rtest: $(math-host-objs)
 	$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
 
-build/bin/mathtest: $(B)/test/mathtest.o build/lib/libmathlib.a
-	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+build/bin/mathtest: $(math-build-dir)/test/mathtest.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -o $@ $^ $(libm-libs)
 
-build/bin/mathbench: $(B)/test/mathbench.o build/lib/libmathlib.a
-	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+build/bin/mathbench: $(math-build-dir)/test/mathbench.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -o $@ $^ $(libm-libs)
 
 # This is not ideal, but allows custom symbols in mathbench to get resolved.
-build/bin/mathbench_libc: $(B)/test/mathbench.o build/lib/libmathlib.a
-	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/lib/libmathlib.a -lm
-
-build/bin/ulp: $(B)/test/ulp.o build/lib/libmathlib.a
-	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+build/bin/mathbench_libc: $(math-build-dir)/test/mathbench.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -o $@ $< $(libm-libs) $(libc-libs) build/lib/libmathlib.a $(libm-libs)
 
-build/include/%.h: $(S)/include/%.h
-	cp $< $@
+build/bin/ulp: $(math-build-dir)/test/ulp.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -o $@ $^ $(LDLIBS)
 
-build/include/test/%.h: $(S)/test/%.h
+build/include/%.h: $(math-src-dir)/include/%.h
 	cp $< $@
 
-build/bin/%.sh: $(S)/test/%.sh
+build/bin/%.sh: $(math-src-dir)/test/%.sh
 	cp $< $@
 
-math-tests := $(wildcard $(S)/test/testcases/directed/*.tst)
-math-rtests := $(wildcard $(S)/test/testcases/random/*.tst)
+math-tests := $(wildcard $(math-src-dir)/test/testcases/directed/*.tst)
+ifneq ($(WANT_EXP10_TESTS),1)
+math-tests := $(filter-out %exp10.tst, $(math-tests))
+endif
+math-rtests := $(wildcard $(math-src-dir)/test/testcases/random/*.tst)
 
 check-math-test: $(math-tools)
 	cat $(math-tests) | $(EMULATOR) build/bin/mathtest $(math-testflags)
@@ -104,8 +195,88 @@ check-math-test: $(math-tools)
 check-math-rtest: $(math-host-tools) $(math-tools)
 	cat $(math-rtests) | build/bin/rtest | $(EMULATOR) build/bin/mathtest $(math-testflags)
 
+ulp-input-dir = $(math-build-dir)/test/inputs
+$(ulp-input-dir) $(ulp-input-dir)/$(ARCH) $(ulp-input-dir)/aarch64/sve $(ulp-input-dir)/aarch64/advsimd \
+$(ulp-input-dir)/aarch64/experimental $(ulp-input-dir)/aarch64/experimental/advsimd $(ulp-input-dir)/aarch64/experimental/sve:
+	mkdir -p $@
+
+math-lib-lims = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.ulp,$(math-lib-srcs))
+math-lib-lims-nn = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.ulp_nn,$(math-lib-srcs))
+math-lib-fenvs = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.fenv,$(math-lib-srcs))
+math-lib-itvs = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.itv,$(math-lib-srcs))
+math-lib-cvals = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.cval,$(math-lib-srcs))
+
+ulp-inputs = $(math-lib-lims) $(math-lib-lims-nn) $(math-lib-fenvs) $(math-lib-itvs) $(math-lib-cvals)
+$(ulp-inputs): CFLAGS = -I$(math-src-dir)/test -I$(math-src-dir)/include -I$(math-src-dir) $(math-cflags)\
+                        -I$(math-src-dir)/aarch64/advsimd -I$(math-src-dir)/aarch64/sve
+
+$(ulp-input-dir)/%.ulp.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.ulp: $(ulp-input-dir)/%.ulp.i
+	{ grep "TEST_ULP " $< || true; } > $@
+
+$(ulp-input-dir)/%.ulp_nn.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.ulp_nn: $(ulp-input-dir)/%.ulp_nn.i
+	{ grep "TEST_ULP_NONNEAREST " $< || true; } > $@
+
+$(ulp-input-dir)/%.fenv.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.fenv: $(ulp-input-dir)/%.fenv.i
+	{ grep "TEST_DISABLE_FENV " $< || true; } > $@
+
+$(ulp-input-dir)/%.itv.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.itv: $(ulp-input-dir)/%.itv.i
+	{ grep "TEST_INTERVAL " $< || true; } | sed "s/ TEST_INTERVAL/\nTEST_INTERVAL/g" > $@
+
+$(ulp-input-dir)/%.cval.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.cval: $(ulp-input-dir)/%.cval.i
+	{ grep "TEST_CONTROL_VALUE " $< || true; } > $@
+
+ulp-lims = $(ulp-input-dir)/limits
+$(ulp-lims): $(math-lib-lims)
+
+ulp-lims-nn = $(ulp-input-dir)/limits_nn
+$(ulp-lims-nn): $(math-lib-lims-nn)
+
+fenv-exps := $(ulp-input-dir)/fenv
+$(fenv-exps): $(math-lib-fenvs)
+
+generic-itvs = $(ulp-input-dir)/itvs
+$(generic-itvs): $(filter-out $(ulp-input-dir)/$(ARCH)/%,$(math-lib-itvs))
+
+arch-itvs = $(ulp-input-dir)/$(ARCH)/itvs
+$(arch-itvs): $(filter $(ulp-input-dir)/$(ARCH)/%,$(math-lib-itvs))
+
+ulp-cvals := $(ulp-input-dir)/cvals
+$(ulp-cvals): $(math-lib-cvals)
+
+# Remove first word, which will be TEST directive
+$(ulp-lims) $(ulp-lims-nn) $(fenv-exps) $(arch-itvs) $(generic-itvs) $(ulp-cvals): | $$(@D)
+	sed "s/TEST_[^ ]* //g" $^ | sort -u > $@
+
+check-math-ulp: $(ulp-lims) $(ulp-lims-nn)
+check-math-ulp: $(fenv-exps) $(ulp-cvals)
+check-math-ulp: $(generic-itvs) $(arch-itvs)
 check-math-ulp: $(math-tools)
-	ULPFLAGS="$(math-ulpflags)" WANT_SIMD_EXCEPT="$(WANT_SIMD_EXCEPT)" build/bin/runulp.sh $(EMULATOR)
+	ULPFLAGS="$(math-ulpflags)" \
+	LIMITS=../../$(ulp-lims) \
+	ARCH_ITVS=../../$(arch-itvs) \
+	GEN_ITVS=../../$(generic-itvs) \
+	DISABLE_FENV=../../$(fenv-exps) \
+	CVALS=../../$(ulp-cvals) \
+	FUNC=$(func) \
+	WANT_EXPERIMENTAL_MATH=$(WANT_EXPERIMENTAL_MATH) \
+	WANT_SVE_TESTS=$(WANT_SVE_TESTS) \
+	USE_MPFR=$(USE_MPFR) \
+	build/bin/runulp.sh $(EMULATOR)
 
 check-math: check-math-test check-math-rtest check-math-ulp
 
diff --git a/math/README.contributors b/math/README.contributors
index 33e7ba376e4193..58a04fa4759d15 100644
--- a/math/README.contributors
+++ b/math/README.contributors
@@ -1,8 +1,9 @@
 STYLE REQUIREMENTS
 ==================
 
-1. Most code in this sub-directory is expected to be upstreamed into glibc so
-   the GNU Coding Standard and glibc specific conventions should be followed
+1. With the exception of math/aarch64/experimental/, most code in this
+   sub-directory is expected to be upstreamed into glibc so the GNU
+   Coding Standard and glibc specific conventions should be followed
    to ease upstreaming.
 
 2. ABI and symbols: the code should be written so it is suitable for inclusion
diff --git a/pl/math/v_acos_2u.c b/math/aarch64/advsimd/acos.c
similarity index 85%
rename from pl/math/v_acos_2u.c
rename to math/aarch64/advsimd/acos.c
index 581f8506c0d6f5..7873a07e6f56eb 100644
--- a/pl/math/v_acos_2u.c
+++ b/math/aarch64/advsimd/acos.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision vector acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -30,8 +30,8 @@ static const struct data
 };
 
 #define AllMask v_u64 (0xffffffffffffffff)
-#define Oneu (0x3ff0000000000000)
-#define Small (0x3e50000000000000) /* 2^-53.  */
+#define Oneu 0x3ff0000000000000
+#define Small 0x3e50000000000000 /* 2^-53.  */
 
 #if WANT_SIMD_EXCEPT
 static float64x2_t VPCS_ATTR NOINLINE
@@ -111,12 +111,12 @@ float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x)
   return vfmaq_f64 (add, mul, y);
 }
 
-PL_SIG (V, D, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_D1 (acos), 1.02)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (acos), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), 0, Small, 5000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), Small, 0.5, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), -0, -inf, 20000)
+TEST_SIG (V, D, 1, acos, -1.0, 1.0)
+TEST_ULP (V_NAME_D1 (acos), 1.02)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (acos), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (acos), 0, Small, 5000)
+TEST_INTERVAL (V_NAME_D1 (acos), Small, 0.5, 50000)
+TEST_INTERVAL (V_NAME_D1 (acos), 0.5, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (acos), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (V_NAME_D1 (acos), 0x1p11, inf, 20000)
+TEST_INTERVAL (V_NAME_D1 (acos), -0, -inf, 20000)
diff --git a/pl/math/v_acosf_1u4.c b/math/aarch64/advsimd/acosf.c
similarity index 82%
rename from pl/math/v_acosf_1u4.c
rename to math/aarch64/advsimd/acosf.c
index bb17b1df18f355..e200f792c76436 100644
--- a/pl/math/v_acosf_1u4.c
+++ b/math/aarch64/advsimd/acosf.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision vector acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -57,8 +57,8 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 
    The largest observed error in this region is 1.32 ulps,
    _ZGVnN4v_acosf (0x1.15ba56p-1) got 0x1.feb33p-1
-			   want 0x1.feb32ep-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (acos) (float32x4_t x)
+				 want 0x1.feb32ep-1.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (acos) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -102,12 +102,14 @@ float32x4_t VPCS_ATTR V_NAME_F1 (acos) (float32x4_t x)
   return vfmaq_f32 (add, mul, y);
 }
 
-PL_SIG (V, F, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_F1 (acos), 0.82)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (acos), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 0, 0x1p-26, 5000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 0x1p-26, 0.5, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), -0, -inf, 20000)
+HALF_WIDTH_ALIAS_F1 (acos)
+
+TEST_SIG (V, F, 1, acos, -1.0, 1.0)
+TEST_ULP (V_NAME_F1 (acos), 0.82)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (acos), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (acos), 0, 0x1p-26, 5000)
+TEST_INTERVAL (V_NAME_F1 (acos), 0x1p-26, 0.5, 50000)
+TEST_INTERVAL (V_NAME_F1 (acos), 0.5, 1.0, 50000)
+TEST_INTERVAL (V_NAME_F1 (acos), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (V_NAME_F1 (acos), 0x1p11, inf, 20000)
+TEST_INTERVAL (V_NAME_F1 (acos), -0, -inf, 20000)
diff --git a/pl/math/v_acosh_3u5.c b/math/aarch64/advsimd/acosh.c
similarity index 72%
rename from pl/math/v_acosh_3u5.c
rename to math/aarch64/advsimd/acosh.c
index 42fa2616d562bb..55d8ed5a421ecd 100644
--- a/pl/math/v_acosh_3u5.c
+++ b/math/aarch64/advsimd/acosh.c
@@ -1,12 +1,12 @@
 /*
- * Single-precision vector acosh(x) function.
- * Copyright (c) 2023, Arm Limited.
+ * Double-precision vector acosh(x) function.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define WANT_V_LOG1P_K0_SHORTCUT 1
 #include "v_log1p_inline.h"
@@ -45,9 +45,8 @@ VPCS_ATTR float64x2_t V_NAME_D1 (acosh) (float64x2_t x)
     x = vbslq_f64 (special, vreinterpretq_f64_u64 (d->one), x);
 #endif
 
-  float64x2_t xm1 = vsubq_f64 (x, v_f64 (1));
-  float64x2_t y;
-  y = vaddq_f64 (x, v_f64 (1));
+  float64x2_t xm1 = vsubq_f64 (x, v_f64 (1.0));
+  float64x2_t y = vaddq_f64 (x, v_f64 (1.0));
   y = vmulq_f64 (y, xm1);
   y = vsqrtq_f64 (y);
   y = vaddq_f64 (xm1, y);
@@ -57,10 +56,10 @@ VPCS_ATTR float64x2_t V_NAME_D1 (acosh) (float64x2_t x)
   return log1p_inline (y, &d->log1p_consts);
 }
 
-PL_SIG (V, D, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (acosh), 2.53)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (acosh), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_D1 (acosh), 1, 0x1p511, 90000)
-PL_TEST_INTERVAL (V_NAME_D1 (acosh), 0x1p511, inf, 10000)
-PL_TEST_INTERVAL (V_NAME_D1 (acosh), 0, 1, 1000)
-PL_TEST_INTERVAL (V_NAME_D1 (acosh), -0, -inf, 10000)
+TEST_SIG (V, D, 1, acosh, 1.0, 10.0)
+TEST_ULP (V_NAME_D1 (acosh), 2.53)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (acosh), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (acosh), 1, 0x1p511, 90000)
+TEST_INTERVAL (V_NAME_D1 (acosh), 0x1p511, inf, 10000)
+TEST_INTERVAL (V_NAME_D1 (acosh), 0, 1, 1000)
+TEST_INTERVAL (V_NAME_D1 (acosh), -0, -inf, 10000)
diff --git a/pl/math/v_acoshf_3u1.c b/math/aarch64/advsimd/acoshf.c
similarity index 50%
rename from pl/math/v_acoshf_3u1.c
rename to math/aarch64/advsimd/acoshf.c
index a2ff0f02635b37..029d457cfa8aed 100644
--- a/pl/math/v_acoshf_3u1.c
+++ b/math/aarch64/advsimd/acoshf.c
@@ -1,49 +1,46 @@
 /*
  * Single-precision vector acosh(x) function.
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "v_log1pf_inline.h"
 
+#define SquareLim 0x1p64
+
 const static struct data
 {
   struct v_log1pf_data log1pf_consts;
   uint32x4_t one;
-  uint16x4_t thresh;
-} data = {
-  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
-  .one = V4 (0x3f800000),
-  .thresh = V4 (0x2000) /* asuint(0x1p64) - asuint(1).  */
-};
+} data = { .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .one = V4 (0x3f800000) };
 
-#define SignMask 0x80000000
+#define Thresh vdup_n_u16 (0x2000) /* top(asuint(SquareLim) - asuint(1)).  */
 
 static float32x4_t NOINLINE VPCS_ATTR
 special_case (float32x4_t x, float32x4_t y, uint16x4_t special,
-	      const struct v_log1pf_data d)
+	      const struct v_log1pf_data *d)
 {
   return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special));
 }
 
 /* Vector approximation for single-precision acosh, based on log1p. Maximum
    error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it
-   is 2.78 ULP:
-   __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3
-			   want 0x1.ef9ea2p-3.
+   is 3.00 ULP:
+   _ZGVnN4v_acoshf(0x1.01df3ap+0) got 0x1.ef0a82p-4
+				 want 0x1.ef0a7cp-4.
    With exceptions disabled, we can compute u with a shorter dependency chain,
-   which gives maximum error of 3.07 ULP:
-  __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4
-			   want 0x1.fbc7f4p-4.  */
+   which gives maximum error of 3.22 ULP:
+   _ZGVnN4v_acoshf(0x1.007ef2p+0) got 0x1.fdcdccp-5
+				 want 0x1.fdcdd2p-5.  */
 
-VPCS_ATTR float32x4_t V_NAME_F1 (acosh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (acosh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh);
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), Thresh);
 
 #if WANT_SIMD_EXCEPT
   /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use
@@ -54,25 +51,28 @@ VPCS_ATTR float32x4_t V_NAME_F1 (acosh) (float32x4_t x)
   float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p);
   float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1);
 #else
-  float32x4_t xm1 = vsubq_f32 (x, v_f32 (1));
-  float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f)));
+  float32x4_t xm1 = vsubq_f32 (x, vreinterpretq_f32_u32 (d->one));
+  float32x4_t u
+      = vmulq_f32 (xm1, vaddq_f32 (x, vreinterpretq_f32_u32 (d->one)));
 #endif
 
   float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u));
 
   if (unlikely (v_any_u16h (special)))
-    return special_case (x, y, special, d->log1pf_consts);
-  return log1pf_inline (y, d->log1pf_consts);
+    return special_case (x, y, special, &d->log1pf_consts);
+  return log1pf_inline (y, &d->log1pf_consts);
 }
 
-PL_SIG (V, F, 1, acosh, 1.0, 10.0)
+HALF_WIDTH_ALIAS_F1 (acosh)
+
+TEST_SIG (V, F, 1, acosh, 1.0, 10.0)
 #if WANT_SIMD_EXCEPT
-PL_TEST_ULP (V_NAME_F1 (acosh), 2.29)
+TEST_ULP (V_NAME_F1 (acosh), 2.50)
 #else
-PL_TEST_ULP (V_NAME_F1 (acosh), 2.58)
+TEST_ULP (V_NAME_F1 (acosh), 2.78)
 #endif
-PL_TEST_EXPECT_FENV (V_NAME_F1 (acosh), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_F1 (acosh), 0, 1, 500)
-PL_TEST_INTERVAL (V_NAME_F1 (acosh), 1, SquareLim, 100000)
-PL_TEST_INTERVAL (V_NAME_F1 (acosh), SquareLim, inf, 1000)
-PL_TEST_INTERVAL (V_NAME_F1 (acosh), -0, -inf, 1000)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (acosh), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (acosh), 0, 1, 500)
+TEST_INTERVAL (V_NAME_F1 (acosh), 1, SquareLim, 100000)
+TEST_INTERVAL (V_NAME_F1 (acosh), SquareLim, inf, 1000)
+TEST_INTERVAL (V_NAME_F1 (acosh), -0, -inf, 1000)
diff --git a/pl/math/v_asin_3u.c b/math/aarch64/advsimd/asin.c
similarity index 56%
rename from pl/math/v_asin_3u.c
rename to math/aarch64/advsimd/asin.c
index 756443c6b320ba..c751d9264a1285 100644
--- a/pl/math/v_asin_3u.c
+++ b/math/aarch64/advsimd/asin.c
@@ -1,36 +1,35 @@
 /*
  * Double-precision vector asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
-  float64x2_t poly[12];
+  float64x2_t c0, c2, c4, c6, c8, c10;
   float64x2_t pi_over_2;
   uint64x2_t abs_mask;
+  double c1, c3, c5, c7, c9, c11;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
-	    V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
-	    V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
-	    V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
-	    V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
-	    V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
-  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
-  .abs_mask = V2 (0x7fffffffffffffff),
+  .c0 = V2 (0x1.555555555554ep-3),	  .c1 = 0x1.3333333337233p-4,
+  .c2 = V2 (0x1.6db6db67f6d9fp-5),	  .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = V2 (0x1.6e8b264d467d6p-6),	  .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = V2 (0x1.c86a22cd9389dp-7),	  .c7 = 0x1.856073c22ebbep-7,
+  .c8 = V2 (0x1.fd1151acb6bedp-8),	  .c9 = 0x1.087182f799c1dp-6,
+  .c10 = V2 (-0x1.6602748120927p-7),	  .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff),
 };
 
 #define AllMask v_u64 (0xffffffffffffffff)
-#define One (0x3ff0000000000000)
-#define Small (0x3e50000000000000) /* 2^-12.  */
+#define One 0x3ff0000000000000
+#define Small 0x3e50000000000000 /* 2^-12.  */
 
 #if WANT_SIMD_EXCEPT
 static float64x2_t VPCS_ATTR NOINLINE
@@ -58,12 +57,11 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
      asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 2.69 ulps,
-   _ZGVnN2v_asin (0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-				       want 0x1.110d7e85fdd53p-1.  */
+   _ZGVnN2v_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1
+				       want 0x1.1111dd54ddf99p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
-
   float64x2_t ax = vabsq_f64 (x);
 
 #if WANT_SIMD_EXCEPT
@@ -76,7 +74,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
     return special_case (x, x, AllMask);
 #endif
 
-  uint64x2_t a_lt_half = vcltq_f64 (ax, v_f64 (0.5));
+  uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5));
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z = x ^ 2 and y = |x|            , if |x| < 0.5
@@ -89,7 +87,26 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
   float64x2_t z4 = vmulq_f64 (z2, z2);
   float64x2_t z8 = vmulq_f64 (z4, z4);
   float64x2_t z16 = vmulq_f64 (z8, z8);
-  float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
+
+  /* order-11 estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
+  float64x2_t p = vfmaq_f64 (p07, z16, p811);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
@@ -102,12 +119,12 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
   return vbslq_f64 (d->abs_mask, y, x);
 }
 
-PL_SIG (V, D, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_D1 (asin), 2.19)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (asin), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), 0, Small, 5000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), Small, 0.5, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), -0, -inf, 20000)
+TEST_SIG (V, D, 1, asin, -1.0, 1.0)
+TEST_ULP (V_NAME_D1 (asin), 2.20)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (asin), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (asin), 0, Small, 5000)
+TEST_INTERVAL (V_NAME_D1 (asin), Small, 0.5, 50000)
+TEST_INTERVAL (V_NAME_D1 (asin), 0.5, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (asin), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (V_NAME_D1 (asin), 0x1p11, inf, 20000)
+TEST_INTERVAL (V_NAME_D1 (asin), -0, -inf, 20000)
diff --git a/pl/math/v_asinf_2u5.c b/math/aarch64/advsimd/asinf.c
similarity index 82%
rename from pl/math/v_asinf_2u5.c
rename to math/aarch64/advsimd/asinf.c
index eb978cd956ab82..970feb37e1d592 100644
--- a/pl/math/v_asinf_2u5.c
+++ b/math/aarch64/advsimd/asinf.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision vector asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -53,7 +53,7 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 
    The largest observed error in this region is 2.41 ulps,
      _ZGVnN4v_asinf (0x1.00203ep-1) got 0x1.0c3a64p-1 want 0x1.0c3a6p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (asin) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asin) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -93,12 +93,14 @@ float32x4_t VPCS_ATTR V_NAME_F1 (asin) (float32x4_t x)
   return vbslq_f32 (v_u32 (AbsMask), y, x);
 }
 
-PL_SIG (V, F, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_F1 (asin), 1.91)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (asin), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 0, 0x1p-12, 5000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 0x1p-12, 0.5, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), -0, -inf, 20000)
+HALF_WIDTH_ALIAS_F1 (asin)
+
+TEST_SIG (V, F, 1, asin, -1.0, 1.0)
+TEST_ULP (V_NAME_F1 (asin), 1.91)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (asin), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (asin), 0, 0x1p-12, 5000)
+TEST_INTERVAL (V_NAME_F1 (asin), 0x1p-12, 0.5, 50000)
+TEST_INTERVAL (V_NAME_F1 (asin), 0.5, 1.0, 50000)
+TEST_INTERVAL (V_NAME_F1 (asin), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (V_NAME_F1 (asin), 0x1p11, inf, 20000)
+TEST_INTERVAL (V_NAME_F1 (asin), -0, -inf, 20000)
diff --git a/math/aarch64/advsimd/asinh.c b/math/aarch64/advsimd/asinh.c
new file mode 100644
index 00000000000000..550302826bd92e
--- /dev/null
+++ b/math/aarch64/advsimd/asinh.c
@@ -0,0 +1,242 @@
+/*
+ * Double-precision vector asinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "test_defs.h"
+#include "test_sig.h"
+#include "v_math.h"
+
+const static struct data
+{
+  uint64x2_t huge_bound, abs_mask, off, mask;
+#if WANT_SIMD_EXCEPT
+  float64x2_t tiny_bound;
+#endif
+  float64x2_t lc0, lc2;
+  double lc1, lc3, ln2, lc4;
+
+  float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c17;
+  double c1, c3, c5, c7, c9, c11, c13, c15;
+
+} data = {
+
+#if WANT_SIMD_EXCEPT
+  .tiny_bound = V2 (0x1p-26),
+#endif
+  /* Even terms of polynomial s.t. asinh(x) is approximated by
+     asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...).
+     Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2).  */
+
+  .c0 = V2 (-0x1.55555555554a7p-3),
+  .c1 = 0x1.3333333326c7p-4,
+  .c2 = V2 (-0x1.6db6db68332e6p-5),
+  .c3 = 0x1.f1c71b26fb40dp-6,
+  .c4 = V2 (-0x1.6e8b8b654a621p-6),
+  .c5 = 0x1.1c4daa9e67871p-6,
+  .c6 = V2 (-0x1.c9871d10885afp-7),
+  .c7 = 0x1.7a16e8d9d2ecfp-7,
+  .c8 = V2 (-0x1.3ddca533e9f54p-7),
+  .c9 = 0x1.0becef748dafcp-7,
+  .c10 = V2 (-0x1.b90c7099dd397p-8),
+  .c11 = 0x1.541f2bb1ffe51p-8,
+  .c12 = V2 (-0x1.d217026a669ecp-9),
+  .c13 = 0x1.0b5c7977aaf7p-9,
+  .c14 = V2 (-0x1.e0f37daef9127p-11),
+  .c15 = 0x1.388b5fe542a6p-12,
+  .c16 = V2 (-0x1.021a48685e287p-14),
+  .c17 = V2 (0x1.93d4ba83d34dap-18),
+
+  .lc0 = V2 (-0x1.ffffffffffff7p-2),
+  .lc1 = 0x1.55555555170d4p-2,
+  .lc2 = V2 (-0x1.0000000399c27p-2),
+  .lc3 = 0x1.999b2e90e94cap-3,
+  .lc4 = -0x1.554e550bd501ep-3,
+  .ln2 = 0x1.62e42fefa39efp-1,
+
+  .off = V2 (0x3fe6900900000000),
+  .huge_bound = V2 (0x5fe0000000000000),
+  .abs_mask = V2 (0x7fffffffffffffff),
+  .mask = V2 (0xfffULL << 52),
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t abs_mask,
+	      uint64x2_t special)
+{
+  /* Copy sign.  */
+  y = vbslq_f64 (abs_mask, y, x);
+  return v_call_f64 (asinh, x, y, special);
+}
+
+#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t logc;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  /* Since N is a power of 2, n % N = n & (N - 1).  */
+  struct entry e;
+  uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.logc = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static inline float64x2_t
+log_inline (float64x2_t xm, const struct data *d)
+{
+
+  uint64x2_t u = vreinterpretq_u64_f64 (xm);
+  uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+  uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->mask));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+  struct entry e = lookup (u_off);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+
+  /* hi = r + log(c) + k*Ln2.  */
+  float64x2_t ln2_and_lc4 = vld1q_f64 (&d->ln2);
+  float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_lc4, 0);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  float64x2_t odd_coeffs = vld1q_f64 (&d->lc1);
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t y = vfmaq_laneq_f64 (d->lc2, r, odd_coeffs, 1);
+  float64x2_t p = vfmaq_laneq_f64 (d->lc0, r, odd_coeffs, 0);
+  y = vfmaq_laneq_f64 (y, r2, ln2_and_lc4, 1);
+  y = vfmaq_f64 (p, r2, y);
+  return vfmaq_f64 (hi, y, r2);
+}
+
+/* Double-precision implementation of vector asinh(x).
+   asinh is very sensitive around 1, so it is impractical to devise a single
+   low-cost algorithm which is sufficiently accurate on a wide range of input.
+   Instead we use two different algorithms:
+   asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)      if |x| >= 1
+	    = sign(x) * (|x| + |x|^3 * P(x^2))       otherwise
+   where log(x) is an optimized log approximation, and P(x) is a polynomial
+   shared with the scalar routine. The greatest observed error 2.79 ULP, in
+   |x| >= 1:
+   _ZGVnN2v_asinh(0x1.2cd9d73ea76a6p+0) got 0x1.ffffd003219dap-1
+				       want  0x1.ffffd003219ddp-1.  */
+VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float64x2_t ax = vabsq_f64 (x);
+
+  uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1));
+
+#if WANT_SIMD_EXCEPT
+  uint64x2_t iax = vreinterpretq_u64_f64 (ax);
+  uint64x2_t special = vcgeq_u64 (iax, (d->huge_bound));
+  uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound);
+  special = vorrq_u64 (special, tiny);
+#else
+  uint64x2_t special = vcgeq_f64 (ax, vreinterpretq_f64_u64 (d->huge_bound));
+#endif
+
+  /* Option 1: |x| >= 1.
+     Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).
+     If WANT_SIMD_EXCEPT is enabled, sidestep special values, which will
+     overflow, by setting special lanes to 1. These will be fixed later.  */
+  float64x2_t option_1 = v_f64 (0);
+  if (likely (v_any_u64 (gt1)))
+    {
+#if WANT_SIMD_EXCEPT
+      float64x2_t xm = v_zerofy_f64 (ax, special);
+#else
+      float64x2_t xm = ax;
+#endif
+      option_1 = log_inline (
+	  vaddq_f64 (xm, vsqrtq_f64 (vfmaq_f64 (v_f64 (1), xm, xm))), d);
+    }
+
+  /* Option 2: |x| < 1.
+     Compute asinh(x) using a polynomial.
+     If WANT_SIMD_EXCEPT is enabled, sidestep special lanes, which will
+     overflow, and tiny lanes, which will underflow, by setting them to 0. They
+     will be fixed later, either by selecting x or falling back to the scalar
+     special-case. The largest observed error in this region is 1.47 ULPs:
+     _ZGVnN2v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
+					 want 0x1.c1d6bf874019cp-1.  */
+  float64x2_t option_2 = v_f64 (0);
+
+  if (likely (v_any_u64 (vceqzq_u64 (gt1))))
+    {
+
+#if WANT_SIMD_EXCEPT
+      ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1));
+#endif
+      float64x2_t x2 = vmulq_f64 (ax, ax), z2 = vmulq_f64 (x2, x2);
+      /* Order-17 Pairwise Horner scheme.  */
+      float64x2_t c13 = vld1q_f64 (&d->c1);
+      float64x2_t c57 = vld1q_f64 (&d->c5);
+      float64x2_t c911 = vld1q_f64 (&d->c9);
+      float64x2_t c1315 = vld1q_f64 (&d->c13);
+
+      float64x2_t p01 = vfmaq_laneq_f64 (d->c0, x2, c13, 0);
+      float64x2_t p23 = vfmaq_laneq_f64 (d->c2, x2, c13, 1);
+      float64x2_t p45 = vfmaq_laneq_f64 (d->c4, x2, c57, 0);
+      float64x2_t p67 = vfmaq_laneq_f64 (d->c6, x2, c57, 1);
+      float64x2_t p89 = vfmaq_laneq_f64 (d->c8, x2, c911, 0);
+      float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, x2, c911, 1);
+      float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, x2, c1315, 0);
+      float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, x2, c1315, 1);
+      float64x2_t p1617 = vfmaq_f64 (d->c16, x2, d->c17);
+
+      float64x2_t p = vfmaq_f64 (p1415, z2, p1617);
+      p = vfmaq_f64 (p1213, z2, p);
+      p = vfmaq_f64 (p1011, z2, p);
+      p = vfmaq_f64 (p89, z2, p);
+
+      p = vfmaq_f64 (p67, z2, p);
+      p = vfmaq_f64 (p45, z2, p);
+
+      p = vfmaq_f64 (p23, z2, p);
+
+      p = vfmaq_f64 (p01, z2, p);
+      option_2 = vfmaq_f64 (ax, p, vmulq_f64 (ax, x2));
+#if WANT_SIMD_EXCEPT
+      option_2 = vbslq_f64 (tiny, x, option_2);
+#endif
+    }
+
+  /* Choose the right option for each lane.  */
+  float64x2_t y = vbslq_f64 (gt1, option_1, option_2);
+  if (unlikely (v_any_u64 (special)))
+    {
+      return special_case (x, y, d->abs_mask, special);
+    }
+  /* Copy sign.  */
+  return vbslq_f64 (d->abs_mask, y, x);
+}
+
+TEST_SIG (V, D, 1, asinh, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (asinh), 2.29)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (asinh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (asinh), 0, 0x1p-26, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (asinh), 0x1p-26, 1, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (asinh), 1, 0x1p511, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (asinh), 0x1p511, inf, 40000)
+/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
+   Ensures the v_sel is choosing the right option in all cases.  */
+TEST_CONTROL_VALUE (V_NAME_D1 (asinh), 0.5)
+TEST_CONTROL_VALUE (V_NAME_D1 (asinh), 2)
+TEST_CONTROL_VALUE (V_NAME_D1 (asinh), 0x1p600)
diff --git a/math/aarch64/advsimd/asinhf.c b/math/aarch64/advsimd/asinhf.c
new file mode 100644
index 00000000000000..6a96f6ee9f4b9b
--- /dev/null
+++ b/math/aarch64/advsimd/asinhf.c
@@ -0,0 +1,89 @@
+/*
+ * Single-precision vector asinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_log1pf_inline.h"
+
+const static struct data
+{
+  struct v_log1pf_data log1pf_consts;
+  float32x4_t one;
+  uint32x4_t big_bound;
+#if WANT_SIMD_EXCEPT
+  uint32x4_t tiny_bound;
+#endif
+} data = {
+  .one = V4 (1),
+  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+  .big_bound = V4 (0x5f800000), /* asuint(0x1p64).  */
+#if WANT_SIMD_EXCEPT
+  .tiny_bound = V4 (0x30800000) /* asuint(0x1p-30).  */
+#endif
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, uint32x4_t sign, float32x4_t y,
+	      uint32x4_t special, const struct data *d)
+{
+  return v_call_f32 (
+      asinhf, x,
+      vreinterpretq_f32_u32 (veorq_u32 (
+	  sign, vreinterpretq_u32_f32 (log1pf_inline (y, &d->log1pf_consts)))),
+      special);
+}
+
+/* Single-precision implementation of vector asinh(x), using vector log1p.
+   Worst-case error is 2.59 ULP:
+   _ZGVnN4v_asinhf(0x1.d86124p-3) got 0x1.d449bep-3
+				 want 0x1.d449c4p-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asinh) (float32x4_t x)
+{
+  const struct data *dat = ptr_barrier (&data);
+  float32x4_t ax = vabsq_f32 (x);
+  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+  uint32x4_t special = vcgeq_u32 (iax, dat->big_bound);
+  uint32x4_t sign = veorq_u32 (vreinterpretq_u32_f32 (x), iax);
+  float32x4_t special_arg = x;
+
+#if WANT_SIMD_EXCEPT
+  /* Sidestep tiny and large values to avoid inadvertently triggering
+     under/overflow.  */
+  special = vorrq_u32 (special, vcltq_u32 (iax, dat->tiny_bound));
+  if (unlikely (v_any_u32 (special)))
+    {
+      ax = v_zerofy_f32 (ax, special);
+      x = v_zerofy_f32 (x, special);
+    }
+#endif
+
+  /* asinh(x) = log(x + sqrt(x * x + 1)).
+     For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))).  */
+  float32x4_t d
+      = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (dat->one, ax, ax)));
+  float32x4_t y = vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d));
+
+  if (unlikely (v_any_u32 (special)))
+    return special_case (special_arg, sign, y, special, dat);
+  return vreinterpretq_f32_u32 (veorq_u32 (
+      sign, vreinterpretq_u32_f32 (log1pf_inline (y, &dat->log1pf_consts))));
+}
+
+HALF_WIDTH_ALIAS_F1 (asinh)
+
+TEST_SIG (V, F, 1, asinh, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (asinh), 2.10)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (asinh), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (asinh), 0, 0x1p-12, 40000)
+TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p-12, 1.0, 40000)
+TEST_INTERVAL (V_NAME_F1 (asinh), 1.0, 0x1p11, 40000)
+TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p11, inf, 40000)
+TEST_INTERVAL (V_NAME_F1 (asinh), -0, -0x1p-12, 20000)
+TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p-12, -1.0, 20000)
+TEST_INTERVAL (V_NAME_F1 (asinh), -1.0, -0x1p11, 20000)
+TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p11, -inf, 20000)
diff --git a/pl/math/v_atan_2u5.c b/math/aarch64/advsimd/atan.c
similarity index 51%
rename from pl/math/v_atan_2u5.c
rename to math/aarch64/advsimd/atan.c
index ba68cc3cc720bf..26d2643210685a 100644
--- a/pl/math/v_atan_2u5.c
+++ b/math/aarch64/advsimd/atan.c
@@ -1,32 +1,32 @@
 /*
  * Double-precision vector atan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
+  float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
   float64x2_t pi_over_2;
-  float64x2_t poly[20];
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
 	      [2**-1022, 1.0].  */
-  .poly = { V2 (-0x1.5555555555555p-2),	 V2 (0x1.99999999996c1p-3),
-	    V2 (-0x1.2492492478f88p-3),	 V2 (0x1.c71c71bc3951cp-4),
-	    V2 (-0x1.745d160a7e368p-4),	 V2 (0x1.3b139b6a88ba1p-4),
-	    V2 (-0x1.11100ee084227p-4),	 V2 (0x1.e1d0f9696f63bp-5),
-	    V2 (-0x1.aebfe7b418581p-5),	 V2 (0x1.842dbe9b0d916p-5),
-	    V2 (-0x1.5d30140ae5e99p-5),	 V2 (0x1.338e31eb2fbbcp-5),
-	    V2 (-0x1.00e6eece7de8p-5),	 V2 (0x1.860897b29e5efp-6),
-	    V2 (-0x1.0051381722a59p-6),	 V2 (0x1.14e9dc19a4a4ep-7),
-	    V2 (-0x1.d0062b42fe3bfp-9),	 V2 (0x1.17739e210171ap-10),
-	    V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), },
+  .c0 = V2 (-0x1.5555555555555p-2),	  .c1 = 0x1.99999999996c1p-3,
+  .c2 = V2 (-0x1.2492492478f88p-3),	  .c3 = 0x1.c71c71bc3951cp-4,
+  .c4 = V2 (-0x1.745d160a7e368p-4),	  .c5 = 0x1.3b139b6a88ba1p-4,
+  .c6 = V2 (-0x1.11100ee084227p-4),	  .c7 = 0x1.e1d0f9696f63bp-5,
+  .c8 = V2 (-0x1.aebfe7b418581p-5),	  .c9 = 0x1.842dbe9b0d916p-5,
+  .c10 = V2 (-0x1.5d30140ae5e99p-5),	  .c11 = 0x1.338e31eb2fbbcp-5,
+  .c12 = V2 (-0x1.00e6eece7de8p-5),	  .c13 = 0x1.860897b29e5efp-6,
+  .c14 = V2 (-0x1.0051381722a59p-6),	  .c15 = 0x1.14e9dc19a4a4ep-7,
+  .c16 = V2 (-0x1.d0062b42fe3bfp-9),	  .c17 = 0x1.17739e210171ap-10,
+  .c18 = V2 (-0x1.ab24da7be7402p-13),	  .c19 = 0x1.358851160a528p-16,
   .pi_over_2 = V2 (0x1.921fb54442d18p+0),
 };
 
@@ -42,6 +42,11 @@ static const struct data
 float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+  float64x2_t c1315 = vld1q_f64 (&d->c13);
+  float64x2_t c1719 = vld1q_f64 (&d->c17);
 
   /* Small cases, infs and nans are supported by our approximation technique,
      but do not set fenv flags correctly. Only trigger special case if we need
@@ -80,9 +85,35 @@ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
   float64x2_t x2 = vmulq_f64 (z2, z2);
   float64x2_t x4 = vmulq_f64 (x2, x2);
   float64x2_t x8 = vmulq_f64 (x4, x4);
-  float64x2_t y
-      = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, d->poly),
-		   v_estrin_11_f64 (z2, x2, x4, x8, d->poly + 8), x8);
+
+  /* estrin_7.  */
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, x2, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, x2, p67);
+
+  float64x2_t p07 = vfmaq_f64 (p03, x4, p47);
+
+  /* estrin_11.  */
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, x2, p1011);
+
+  float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0);
+  float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1);
+  float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415);
+
+  float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0);
+  float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1);
+  float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819);
+
+  float64x2_t p815 = vfmaq_f64 (p811, x4, p1215);
+  float64x2_t p819 = vfmaq_f64 (p815, x8, p1619);
+
+  float64x2_t y = vfmaq_f64 (p07, p819, x8);
 
   /* Finalize. y = shift + z + z^3 * P(z^2).  */
   y = vfmaq_f64 (az, y, vmulq_f64 (z2, az));
@@ -93,12 +124,12 @@ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
   return y;
 }
 
-PL_SIG (V, D, 1, atan, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (atan), 1.78)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (atan), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), 0, 0x1p-30, 10000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), -0, -0x1p-30, 1000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), 0x1p-30, 0x1p53, 900000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), -0x1p-30, -0x1p53, 90000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), 0x1p53, inf, 10000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), -0x1p53, -inf, 1000)
+TEST_SIG (V, D, 1, atan, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (atan), 1.78)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (atan), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (atan), 0, 0x1p-30, 10000)
+TEST_INTERVAL (V_NAME_D1 (atan), -0, -0x1p-30, 1000)
+TEST_INTERVAL (V_NAME_D1 (atan), 0x1p-30, 0x1p53, 900000)
+TEST_INTERVAL (V_NAME_D1 (atan), -0x1p-30, -0x1p53, 90000)
+TEST_INTERVAL (V_NAME_D1 (atan), 0x1p53, inf, 10000)
+TEST_INTERVAL (V_NAME_D1 (atan), -0x1p53, -inf, 1000)
diff --git a/math/aarch64/advsimd/atan2.c b/math/aarch64/advsimd/atan2.c
new file mode 100644
index 00000000000000..18c4b70b92f6a6
--- /dev/null
+++ b/math/aarch64/advsimd/atan2.c
@@ -0,0 +1,171 @@
+/*
+ * Double-precision vector atan2(x) function.
+ *
+ * Copyright (c) 2021-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
+  float64x2_t pi_over_2;
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+  uint64x2_t zeroinfnan, minustwo;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+	      [2**-1022, 1.0].  */
+  .c0 = V2 (-0x1.5555555555555p-2),
+  .c1 = 0x1.99999999996c1p-3,
+  .c2 = V2 (-0x1.2492492478f88p-3),
+  .c3 = 0x1.c71c71bc3951cp-4,
+  .c4 = V2 (-0x1.745d160a7e368p-4),
+  .c5 = 0x1.3b139b6a88ba1p-4,
+  .c6 = V2 (-0x1.11100ee084227p-4),
+  .c7 = 0x1.e1d0f9696f63bp-5,
+  .c8 = V2 (-0x1.aebfe7b418581p-5),
+  .c9 = 0x1.842dbe9b0d916p-5,
+  .c10 = V2 (-0x1.5d30140ae5e99p-5),
+  .c11 = 0x1.338e31eb2fbbcp-5,
+  .c12 = V2 (-0x1.00e6eece7de8p-5),
+  .c13 = 0x1.860897b29e5efp-6,
+  .c14 = V2 (-0x1.0051381722a59p-6),
+  .c15 = 0x1.14e9dc19a4a4ep-7,
+  .c16 = V2 (-0x1.d0062b42fe3bfp-9),
+  .c17 = 0x1.17739e210171ap-10,
+  .c18 = V2 (-0x1.ab24da7be7402p-13),
+  .c19 = 0x1.358851160a528p-16,
+  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
+  .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1),
+  .minustwo = V2 (0xc000000000000000),
+};
+
+#define SignMask v_u64 (0x8000000000000000)
+
+/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls).  */
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t y, float64x2_t x, float64x2_t ret,
+	      uint64x2_t sign_xy, uint64x2_t cmp)
+{
+  /* Account for the sign of x and y.  */
+  ret = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+  return v_call2_f64 (atan2, y, x, ret, cmp);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline uint64x2_t
+zeroinfnan (uint64x2_t i, const struct data *d)
+{
+  /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1).  */
+  return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), d->zeroinfnan);
+}
+
+/* Fast implementation of vector atan2.
+   Maximum observed error is 2.8 ulps:
+   _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5)
+	got 0x1.92d628ab678ccp-1
+       want 0x1.92d628ab678cfp-1.  */
+float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t iy = vreinterpretq_u64_f64 (y);
+
+  uint64x2_t special_cases
+      = vorrq_u64 (zeroinfnan (ix, d), zeroinfnan (iy, d));
+
+  uint64x2_t sign_x = vandq_u64 (ix, SignMask);
+  uint64x2_t sign_y = vandq_u64 (iy, SignMask);
+  uint64x2_t sign_xy = veorq_u64 (sign_x, sign_y);
+
+  float64x2_t ax = vabsq_f64 (x);
+  float64x2_t ay = vabsq_f64 (y);
+
+  uint64x2_t pred_xlt0 = vcltzq_f64 (x);
+  uint64x2_t pred_aygtax = vcagtq_f64 (y, x);
+
+  /* Set up z for call to atan.  */
+  float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
+  float64x2_t q = vbslq_f64 (pred_aygtax, ay, ax);
+  float64x2_t z = vdivq_f64 (n, q);
+
+  /* Work out the correct shift.  */
+  float64x2_t shift
+      = vreinterpretq_f64_u64 (vandq_u64 (pred_xlt0, d->minustwo));
+  shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift);
+  shift = vmulq_f64 (shift, d->pi_over_2);
+
+  /* Calculate the polynomial approximation.
+     Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
+     full scheme to avoid underflow in x^16.
+     The order 19 polynomial P approximates
+     (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
+  float64x2_t z2 = vmulq_f64 (z, z);
+  float64x2_t x2 = vmulq_f64 (z2, z2);
+  float64x2_t x4 = vmulq_f64 (x2, x2);
+  float64x2_t x8 = vmulq_f64 (x4, x4);
+
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+  float64x2_t c1315 = vld1q_f64 (&d->c13);
+  float64x2_t c1719 = vld1q_f64 (&d->c17);
+
+  /* estrin_7.  */
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, x2, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, x2, p67);
+
+  float64x2_t p07 = vfmaq_f64 (p03, x4, p47);
+
+  /* estrin_11.  */
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, x2, p1011);
+
+  float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0);
+  float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1);
+  float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415);
+
+  float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0);
+  float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1);
+  float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819);
+
+  float64x2_t p815 = vfmaq_f64 (p811, x4, p1215);
+  float64x2_t p819 = vfmaq_f64 (p815, x8, p1619);
+
+  float64x2_t ret = vfmaq_f64 (p07, p819, x8);
+
+  /* Finalize. y = shift + z + z^3 * P(z^2).  */
+  ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z));
+  ret = vaddq_f64 (ret, shift);
+
+  if (unlikely (v_any_u64 (special_cases)))
+    return special_case (y, x, ret, sign_xy, special_cases);
+
+  /* Account for the sign of x and y.  */
+  ret = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+
+  return ret;
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
+TEST_SIG (V, D, 2, atan2)
+// TODO tighten this once __v_atan2 is fixed
+TEST_ULP (V_NAME_D2 (atan2), 2.9)
+TEST_DISABLE_FENV (V_NAME_D2 (atan2))
+TEST_INTERVAL (V_NAME_D2 (atan2), -10.0, 10.0, 50000)
+TEST_INTERVAL (V_NAME_D2 (atan2), -1.0, 1.0, 40000)
+TEST_INTERVAL (V_NAME_D2 (atan2), 0.0, 1.0, 40000)
+TEST_INTERVAL (V_NAME_D2 (atan2), 1.0, 100.0, 40000)
+TEST_INTERVAL (V_NAME_D2 (atan2), 1e6, 1e32, 40000)
diff --git a/pl/math/v_atan2f_3u.c b/math/aarch64/advsimd/atan2f.c
similarity index 54%
rename from pl/math/v_atan2f_3u.c
rename to math/aarch64/advsimd/atan2f.c
index bbfc3cb552f69d..632014249ab031 100644
--- a/pl/math/v_atan2f_3u.c
+++ b/math/aarch64/advsimd/atan2f.c
@@ -1,59 +1,64 @@
 /*
  * Single-precision vector atan2(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
-  float32x4_t poly[8];
-  float32x4_t pi_over_2;
+  float32x4_t c0, pi_over_2, c4, c6, c2;
+  float c1, c3, c5, c7;
+  uint32x4_t comp_const;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].
      Generated using fpminimax between FLT_MIN and 1.  */
-  .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f),
-	    V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f),
-	    V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) },
-  .pi_over_2 = V4 (0x1.921fb6p+0f),
+  .c0 = V4 (-0x1.55555p-2f),	    .c1 = 0x1.99935ep-3f,
+  .c2 = V4 (-0x1.24051ep-3f),	    .c3 = 0x1.bd7368p-4f,
+  .c4 = V4 (-0x1.491f0ep-4f),	    .c5 = 0x1.93a2c0p-5f,
+  .c6 = V4 (-0x1.4c3c60p-6f),	    .c7 = 0x1.01fd88p-8f,
+  .pi_over_2 = V4 (0x1.921fb6p+0f), .comp_const = V4 (2 * 0x7f800000lu - 1),
 };
 
 #define SignMask v_u32 (0x80000000)
 
 /* Special cases i.e. 0, infinity and nan (fall back to scalar calls).  */
 static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t y, float32x4_t x, float32x4_t ret, uint32x4_t cmp)
+special_case (float32x4_t y, float32x4_t x, float32x4_t ret,
+	      uint32x4_t sign_xy, uint32x4_t cmp)
 {
+  /* Account for the sign of y.  */
+  ret = vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
   return v_call2_f32 (atan2f, y, x, ret, cmp);
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
 static inline uint32x4_t
-zeroinfnan (uint32x4_t i)
+zeroinfnan (uint32x4_t i, const struct data *d)
 {
   /* 2 * i - 1 >= 2 * 0x7f800000lu - 1.  */
-  return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)),
-		    v_u32 (2 * 0x7f800000lu - 1));
+  return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), d->comp_const);
 }
 
 /* Fast implementation of vector atan2f. Maximum observed error is
    2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]:
    _ZGVnN4vv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
 						 want 0x1.967f00p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
 {
-  const struct data *data_ptr = ptr_barrier (&data);
+  const struct data *d = ptr_barrier (&data);
 
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   uint32x4_t iy = vreinterpretq_u32_f32 (y);
 
-  uint32x4_t special_cases = vorrq_u32 (zeroinfnan (ix), zeroinfnan (iy));
+  uint32x4_t special_cases
+      = vorrq_u32 (zeroinfnan (ix, d), zeroinfnan (iy, d));
 
   uint32x4_t sign_x = vandq_u32 (ix, SignMask);
   uint32x4_t sign_y = vandq_u32 (iy, SignMask);
@@ -67,14 +72,14 @@ float32x4_t VPCS_ATTR V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
 
   /* Set up z for call to atanf.  */
   float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
-  float32x4_t d = vbslq_f32 (pred_aygtax, ay, ax);
-  float32x4_t z = vdivq_f32 (n, d);
+  float32x4_t q = vbslq_f32 (pred_aygtax, ay, ax);
+  float32x4_t z = vdivq_f32 (n, q);
 
   /* Work out the correct shift.  */
   float32x4_t shift = vreinterpretq_f32_u32 (
       vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f))));
   shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift);
-  shift = vmulq_f32 (shift, data_ptr->pi_over_2);
+  shift = vmulq_f32 (shift, d->pi_over_2);
 
   /* Calculate the polynomial approximation.
      Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
@@ -86,30 +91,37 @@ float32x4_t VPCS_ATTR V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
   float32x4_t z2 = vmulq_f32 (z, z);
   float32x4_t z4 = vmulq_f32 (z2, z2);
 
-  float32x4_t ret = vfmaq_f32 (
-      v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly), z4,
-      vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly + 4)));
+  float32x4_t c1357 = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c1357, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c1357, 1);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c1357, 2);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, c1357, 3);
+  float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
+  float32x4_t p47 = vfmaq_f32 (p45, z4, p67);
+
+  float32x4_t ret = vfmaq_f32 (p03, z4, vmulq_f32 (z4, p47));
 
   /* y = shift + z * P(z^2).  */
   ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift);
 
-  /* Account for the sign of y.  */
-  ret = vreinterpretq_f32_u32 (
-      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
-
   if (unlikely (v_any_u32 (special_cases)))
     {
-      return special_case (y, x, ret, special_cases);
+      return special_case (y, x, ret, sign_xy, special_cases);
     }
 
-  return ret;
+  /* Account for the sign of y.  */
+  return vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
 }
 
+HALF_WIDTH_ALIAS_F2 (atan2)
+
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (V, F, 2, atan2)
-PL_TEST_ULP (V_NAME_F2 (atan2), 2.46)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), 1e6, 1e32, 40000)
+TEST_SIG (V, F, 2, atan2)
+TEST_DISABLE_FENV (V_NAME_F2 (atan2))
+TEST_ULP (V_NAME_F2 (atan2), 2.46)
+TEST_INTERVAL (V_NAME_F2 (atan2), -10.0, 10.0, 50000)
+TEST_INTERVAL (V_NAME_F2 (atan2), -1.0, 1.0, 40000)
+TEST_INTERVAL (V_NAME_F2 (atan2), 0.0, 1.0, 40000)
+TEST_INTERVAL (V_NAME_F2 (atan2), 1.0, 100.0, 40000)
+TEST_INTERVAL (V_NAME_F2 (atan2), 1e6, 1e32, 40000)
diff --git a/pl/math/v_atanf_3u.c b/math/aarch64/advsimd/atanf.c
similarity index 85%
rename from pl/math/v_atanf_3u.c
rename to math/aarch64/advsimd/atanf.c
index f522d957c1cc30..61927c9b261a45 100644
--- a/pl/math/v_atanf_3u.c
+++ b/math/aarch64/advsimd/atanf.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision vector atan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f32.h"
 
 static const struct data
 {
@@ -43,7 +43,7 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
    atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
    using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps:
    _ZGVnN4v_atanf (0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (atan) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -98,10 +98,12 @@ float32x4_t VPCS_ATTR V_NAME_F1 (atan) (float32x4_t x)
   return y;
 }
 
-PL_SIG (V, F, 1, atan, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (atan), 2.5)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (atan), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0, 0x1p-30, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p-30, 1, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 1, 0x1p30, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p30, inf, 1000)
+HALF_WIDTH_ALIAS_F1 (atan)
+
+TEST_SIG (V, F, 1, atan, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (atan), 2.5)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (atan), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0, 0x1p-30, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p-30, 1, 40000)
+TEST_SYM_INTERVAL (V_NAME_F1 (atan), 1, 0x1p30, 40000)
+TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p30, inf, 1000)
diff --git a/pl/math/v_atanh_3u5.c b/math/aarch64/advsimd/atanh.c
similarity index 55%
rename from pl/math/v_atanh_3u5.c
rename to math/aarch64/advsimd/atanh.c
index f282826a3f3214..c2f9585dd29b18 100644
--- a/pl/math/v_atanh_3u5.c
+++ b/math/aarch64/advsimd/atanh.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision vector atanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define WANT_V_LOG1P_K0_SHORTCUT 0
 #include "v_log1p_inline.h"
@@ -15,15 +15,19 @@
 const static struct data
 {
   struct v_log1p_data log1p_consts;
-  uint64x2_t one, half;
+  uint64x2_t one;
+  uint64x2_t sign_mask;
 } data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
 	   .one = V2 (0x3ff0000000000000),
-	   .half = V2 (0x3fe0000000000000) };
+	   .sign_mask = V2 (0x8000000000000000) };
 
 static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+special_case (float64x2_t x, float64x2_t halfsign, float64x2_t y,
+	      uint64x2_t special, const struct data *d)
 {
-  return v_call_f64 (atanh, x, y, special);
+  y = log1p_inline (y, &d->log1p_consts);
+  return v_call_f64 (atanh, vbslq_f64 (d->sign_mask, halfsign, x),
+		     vmulq_f64 (halfsign, y), special);
 }
 
 /* Approximation for vector double-precision atanh(x) using modified log1p.
@@ -35,11 +39,10 @@ float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
+  float64x2_t halfsign = vbslq_f64 (d->sign_mask, x, v_f64 (0.5));
   float64x2_t ax = vabsq_f64 (x);
   uint64x2_t ia = vreinterpretq_u64_f64 (ax);
-  uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia);
   uint64x2_t special = vcgeq_u64 (ia, d->one);
-  float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half));
 
 #if WANT_SIMD_EXCEPT
   ax = v_zerofy_f64 (ax, special);
@@ -47,20 +50,26 @@ float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
 
   float64x2_t y;
   y = vaddq_f64 (ax, ax);
-  y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax));
-  y = log1p_inline (y, &d->log1p_consts);
+  y = vdivq_f64 (y, vsubq_f64 (vreinterpretq_f64_u64 (d->one), ax));
 
   if (unlikely (v_any_u64 (special)))
-    return special_case (x, vmulq_f64 (y, halfsign), special);
+#if WANT_SIMD_EXCEPT
+    return special_case (x, halfsign, y, special, d);
+#else
+    return special_case (ax, halfsign, y, special, d);
+#endif
+
+  y = log1p_inline (y, &d->log1p_consts);
   return vmulq_f64 (y, halfsign);
 }
 
-PL_SIG (V, D, 1, atanh, -1.0, 1.0)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (atanh), WANT_SIMD_EXCEPT)
-PL_TEST_ULP (V_NAME_D1 (atanh), 3.32)
+TEST_SIG (V, D, 1, atanh, -1.0, 1.0)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (atanh), WANT_SIMD_EXCEPT)
+TEST_ULP (V_NAME_D1 (atanh), 3.32)
+TEST_SYM_INTERVAL (V_NAME_D1 (atanh), 0, 0x1p-23, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (atanh), 0x1p-23, 1, 90000)
+TEST_SYM_INTERVAL (V_NAME_D1 (atanh), 1, inf, 100)
 /* atanh is asymptotic at 1, which is the default control value - have to set
    -c 0 specially to ensure fp exceptions are triggered correctly (choice of
    control lane is irrelevant if fp exceptions are disabled).  */
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 0, 0x1p-23, 10000, 0)
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 0x1p-23, 1, 90000, 0)
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 1, inf, 100, 0)
+TEST_CONTROL_VALUE (V_NAME_D1 (atanh), 0)
diff --git a/pl/math/v_atanhf_3u1.c b/math/aarch64/advsimd/atanhf.c
similarity index 54%
rename from pl/math/v_atanhf_3u1.c
rename to math/aarch64/advsimd/atanhf.c
index f6a5f25eca9a8c..313d15ca63910d 100644
--- a/pl/math/v_atanhf_3u1.c
+++ b/math/aarch64/advsimd/atanhf.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector atanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "v_log1pf_inline.h"
 
 const static struct data
@@ -30,16 +30,18 @@ const static struct data
 #define Half v_u32 (0x3f000000)
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y,
+	      uint32x4_t special)
 {
-  return v_call_f32 (atanhf, x, y, special);
+  return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign),
+		     vmulq_f32 (halfsign, y), special);
 }
 
 /* Approximation for vector single-precision atanh(x) using modified log1p.
-   The maximum error is 3.08 ULP:
-   __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
-			   want 0x1.ffcb82p-5.  */
-VPCS_ATTR float32x4_t V_NAME_F1 (atanh) (float32x4_t x)
+   The maximum error is 2.93 ULP:
+   _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5
+				want 0x1.f4dcf8p-5.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -58,20 +60,31 @@ VPCS_ATTR float32x4_t V_NAME_F1 (atanh) (float32x4_t x)
   uint32x4_t special = vcgeq_u32 (iax, d->one);
 #endif
 
-  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
-  y = log1pf_inline (y, d->log1pf_consts);
+  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax),
+			     vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax));
+  y = log1pf_inline (y, &d->log1pf_consts);
 
+  /* If exceptions not required, pass ax to special-case for shorter dependency
+     chain. If exceptions are required ax will have been zerofied, so have to
+     pass x.  */
   if (unlikely (v_any_u32 (special)))
-    return special_case (x, vmulq_f32 (halfsign, y), special);
+#if WANT_SIMD_EXCEPT
+    return special_case (x, halfsign, y, special);
+#else
+    return special_case (ax, halfsign, y, special);
+#endif
   return vmulq_f32 (halfsign, y);
 }
 
-PL_SIG (V, F, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_F1 (atanh), 2.59)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (atanh), WANT_SIMD_EXCEPT)
+HALF_WIDTH_ALIAS_F1 (atanh)
+
+TEST_SIG (V, F, 1, atanh, -1.0, 1.0)
+TEST_ULP (V_NAME_F1 (atanh), 2.44)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (atanh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (atanh), 0, 0x1p-12, 500)
+TEST_SYM_INTERVAL (V_NAME_F1 (atanh), 0x1p-12, 1, 200000)
+TEST_SYM_INTERVAL (V_NAME_F1 (atanh), 1, inf, 1000)
 /* atanh is asymptotic at 1, which is the default control value - have to set
  -c 0 specially to ensure fp exceptions are triggered correctly (choice of
  control lane is irrelevant if fp exceptions are disabled).  */
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 0, 0x1p-12, 500, 0)
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 0x1p-12, 1, 200000, 0)
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 1, inf, 1000, 0)
+TEST_CONTROL_VALUE (V_NAME_F1 (atanh), 0)
diff --git a/pl/math/v_cbrt_2u.c b/math/aarch64/advsimd/cbrt.c
similarity index 76%
rename from pl/math/v_cbrt_2u.c
rename to math/aarch64/advsimd/cbrt.c
index cc7cff15dc0fa5..8e72e5b566fc88 100644
--- a/pl/math/v_cbrt_2u.c
+++ b/math/aarch64/advsimd/cbrt.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision vector cbrt(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f64.h"
 
 const static struct data
 {
@@ -40,13 +40,20 @@ special_case (float64x2_t x, float64x2_t y, uint32x2_t special)
   return v_call_f64 (cbrt, x, y, vmovl_u32 (special));
 }
 
-/* Approximation for double-precision vector cbrt(x), using low-order polynomial
-   and two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
+/* Approximation for double-precision vector cbrt(x), using low-order
+   polynomial and two Newton iterations.
+
+   The vector version of frexp does not handle subnormals
+   correctly. As a result these need to be handled by the scalar
+   fallback, where accuracy may be worse than that of the vector code
+   path.
+
+   Greatest observed error in the normal range is 1.79 ULP. Errors repeat
    according to the exponent, for instance an error observed for double value
    m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
    integer.
-   __v_cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
-				 want 0x1.965fe72821e99p+0.  */
+   _ZGVnN2v_cbrt (0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
+				       want 0x1.965fe72821e99p+0.  */
 VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -64,8 +71,8 @@ VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
   uint64x2_t ia12 = vshrq_n_u64 (iax, 52);
   int64x2_t e = vsubq_s64 (vreinterpretq_s64_u64 (ia12), exp_bias);
 
-  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
-     Newton iterations.  */
+  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
+     for Newton iterations.  */
   float64x2_t p = v_pairwise_poly_3_f64 (m, vmulq_f64 (m, m), d->poly);
   float64x2_t one_third = d->one_third;
   /* Two iterations of Newton's method for iteratively approximating cbrt.  */
@@ -84,8 +91,8 @@ VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
 
      Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
 
-     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which is
-     an integer in [-2, 2], and can be looked up in the table T. Hence the
+     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+     is an integer in [-2, 2], and can be looked up in the table T. Hence the
      result is assembled as:
 
      cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
@@ -110,7 +117,11 @@ VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
   return vbslq_f64 (d->abs_mask, y, x);
 }
 
-PL_TEST_ULP (V_NAME_D1 (cbrt), 1.30)
-PL_SIG (V, D, 1, cbrt, -10.0, 10.0)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (cbrt))
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cbrt), 0, inf, 1000000)
+/* Worse-case ULP error assumes that scalar fallback is GLIBC 2.40 cbrt, which
+   has ULP error of 3.67 at 0x1.7a337e1ba1ec2p-257 [1]. Largest observed error
+   in the vector path is 1.79 ULP.
+   [1] Innocente, V., & Zimmermann, P. (2024). Accuracy of Mathematical
+   Functions in Single, Double, Double Extended, and Quadruple Precision.  */
+TEST_ULP (V_NAME_D1 (cbrt), 3.17)
+TEST_SIG (V, D, 1, cbrt, -10.0, 10.0)
+TEST_SYM_INTERVAL (V_NAME_D1 (cbrt), 0, inf, 1000000)
diff --git a/pl/math/v_cbrtf_1u7.c b/math/aarch64/advsimd/cbrtf.c
similarity index 91%
rename from pl/math/v_cbrtf_1u7.c
rename to math/aarch64/advsimd/cbrtf.c
index 74918765209f9a..4e76feb2dd8b62 100644
--- a/pl/math/v_cbrtf_1u7.c
+++ b/math/aarch64/advsimd/cbrtf.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision vector cbrt(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f32.h"
 
 const static struct data
 {
@@ -49,7 +49,7 @@ shifted_lookup (const float *table, int32x4_t i)
    0x1.85a2aa and the exponent is a multiple of 3, for example:
    _ZGVnN4v_cbrtf(0x1.85a2aap+3) got 0x1.267936p+1
 				want 0x1.267932p+1.  */
-VPCS_ATTR float32x4_t V_NAME_F1 (cbrt) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cbrt) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint32x4_t iax = vreinterpretq_u32_f32 (vabsq_f32 (x));
@@ -110,7 +110,8 @@ VPCS_ATTR float32x4_t V_NAME_F1 (cbrt) (float32x4_t x)
   return vbslq_f32 (SignMask, x, y);
 }
 
-PL_SIG (V, F, 1, cbrt, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (cbrt), 1.15)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (cbrt))
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cbrt), 0, inf, 1000000)
+HALF_WIDTH_ALIAS_F1 (cbrt)
+
+TEST_SIG (V, F, 1, cbrt, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (cbrt), 1.15)
+TEST_SYM_INTERVAL (V_NAME_F1 (cbrt), 0, inf, 1000000)
diff --git a/pl/math/v_cexpi_3u5.c b/math/aarch64/advsimd/cexpi.c
similarity index 79%
rename from pl/math/v_cexpi_3u5.c
rename to math/aarch64/advsimd/cexpi.c
index 5163b15926b899..40ba5ff31f20ea 100644
--- a/pl/math/v_cexpi_3u5.c
+++ b/math/aarch64/advsimd/cexpi.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision vector sincos function - return-by-value interface.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_sincos_common.h"
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
 
 static float64x2x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, uint64x2_t special, float64x2x2_t y)
@@ -34,11 +34,13 @@ _ZGVnN2v_cexpi (float64x2_t x)
   return sc;
 }
 
-PL_TEST_ULP (_ZGVnN2v_cexpi_sin, 2.73)
-PL_TEST_ULP (_ZGVnN2v_cexpi_cos, 2.73)
+TEST_DISABLE_FENV (_ZGVnN2v_cexpi_cos)
+TEST_DISABLE_FENV (_ZGVnN2v_cexpi_sin)
+TEST_ULP (_ZGVnN2v_cexpi_sin, 2.73)
+TEST_ULP (_ZGVnN2v_cexpi_cos, 2.73)
 #define V_CEXPI_INTERVAL(lo, hi, n)                                           \
-  PL_TEST_INTERVAL (_ZGVnN2v_cexpi_sin, lo, hi, n)                            \
-  PL_TEST_INTERVAL (_ZGVnN2v_cexpi_cos, lo, hi, n)
+  TEST_INTERVAL (_ZGVnN2v_cexpi_sin, lo, hi, n)                               \
+  TEST_INTERVAL (_ZGVnN2v_cexpi_cos, lo, hi, n)
 V_CEXPI_INTERVAL (0, 0x1p23, 500000)
 V_CEXPI_INTERVAL (-0, -0x1p23, 500000)
 V_CEXPI_INTERVAL (0x1p23, inf, 10000)
diff --git a/pl/math/v_cexpif_1u8.c b/math/aarch64/advsimd/cexpif.c
similarity index 80%
rename from pl/math/v_cexpif_1u8.c
rename to math/aarch64/advsimd/cexpif.c
index 4897018d30908b..e55d99653a668c 100644
--- a/pl/math/v_cexpif_1u8.c
+++ b/math/aarch64/advsimd/cexpif.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector cexpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_sincosf_common.h"
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
 
 static float32x4x2_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, uint32x4_t special, float32x4x2_t y)
@@ -36,11 +36,13 @@ _ZGVnN4v_cexpif (float32x4_t x)
   return sc;
 }
 
-PL_TEST_ULP (_ZGVnN4v_cexpif_sin, 1.17)
-PL_TEST_ULP (_ZGVnN4v_cexpif_cos, 1.31)
+TEST_DISABLE_FENV (_ZGVnN4v_cexpif_sin)
+TEST_DISABLE_FENV (_ZGVnN4v_cexpif_cos)
+TEST_ULP (_ZGVnN4v_cexpif_sin, 1.17)
+TEST_ULP (_ZGVnN4v_cexpif_cos, 1.31)
 #define V_CEXPIF_INTERVAL(lo, hi, n)                                          \
-  PL_TEST_INTERVAL (_ZGVnN4v_cexpif_sin, lo, hi, n)                           \
-  PL_TEST_INTERVAL (_ZGVnN4v_cexpif_cos, lo, hi, n)
+  TEST_INTERVAL (_ZGVnN4v_cexpif_sin, lo, hi, n)                              \
+  TEST_INTERVAL (_ZGVnN4v_cexpif_cos, lo, hi, n)
 V_CEXPIF_INTERVAL (0, 0x1p20, 500000)
 V_CEXPIF_INTERVAL (-0, -0x1p20, 500000)
 V_CEXPIF_INTERVAL (0x1p20, inf, 10000)
diff --git a/math/aarch64/v_cos.c b/math/aarch64/advsimd/cos.c
similarity index 80%
rename from math/aarch64/v_cos.c
rename to math/aarch64/advsimd/cos.c
index 9a73575bce896a..9f3de4dd5c3690 100644
--- a/math/aarch64/v_cos.c
+++ b/math/aarch64/advsimd/cos.c
@@ -1,17 +1,19 @@
 /*
  * Double-precision vector cos function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
   float64x2_t poly[7];
-  float64x2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3;
+  float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* Worst-case error is 3.3 ulp in [-pi/2, pi/2].  */
   .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
@@ -19,11 +21,9 @@ static const struct data
 	    V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
 	    V2 (-0x1.9e9540300a1p-41) },
   .inv_pi = V2 (0x1.45f306dc9c883p-2),
-  .half_pi = V2 (0x1.921fb54442d18p+0),
   .pi_1 = V2 (0x1.921fb54442d18p+1),
   .pi_2 = V2 (0x1.1a62633145c06p-53),
   .pi_3 = V2 (0x1.c1cd129024e09p-106),
-  .shift = V2 (0x1.8p52),
   .range_val = V2 (0x1p23)
 };
 
@@ -57,10 +57,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x)
 #endif
 
   /* n = rint((|x|+pi/2)/pi) - 0.5.  */
-  n = vfmaq_f64 (d->shift, d->inv_pi, vaddq_f64 (r, d->half_pi));
-  odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63);
-  n = vsubq_f64 (n, d->shift);
-  n = vsubq_f64 (n, v_f64 (0.5));
+  n = vrndaq_f64 (vfmaq_f64 (v_f64 (0.5), r, d->inv_pi));
+  odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63);
+  n = vsubq_f64 (n, v_f64 (0.5f));
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f64 (r, d->pi_1, n);
@@ -85,3 +84,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x)
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
 }
+
+TEST_SIG (V, D, 1, cos, -3.1, 3.1)
+TEST_ULP (V_NAME_D1 (cos), 3.0)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (cos), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (cos), 0, 0x1p23, 500000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cos), 0x1p23, inf, 10000)
diff --git a/math/aarch64/v_cosf.c b/math/aarch64/advsimd/cosf.c
similarity index 76%
rename from math/aarch64/v_cosf.c
rename to math/aarch64/advsimd/cosf.c
index b9890b2998ad3c..d2844e44e19662 100644
--- a/math/aarch64/v_cosf.c
+++ b/math/aarch64/advsimd/cosf.c
@@ -1,17 +1,19 @@
 /*
  * Single-precision vector cos function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
   float32x4_t poly[4];
-  float32x4_t range_val, inv_pi, half_pi, shift, pi_1, pi_2, pi_3;
+  float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* 1.886 ulp error.  */
   .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f),
@@ -22,8 +24,6 @@ static const struct data
   .pi_3 = V4 (-0x1.ee59dap-49f),
 
   .inv_pi = V4 (0x1.45f306p-2f),
-  .shift = V4 (0x1.8p+23f),
-  .half_pi = V4 (0x1.921fb6p0f),
   .range_val = V4 (0x1p20f)
 };
 
@@ -37,7 +37,7 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
   return v_call_f32 (cosf, x, y, cmp);
 }
 
-float32x4_t VPCS_ATTR V_NAME_F1 (cos) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t n, r, r2, r3, y;
@@ -58,9 +58,8 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cos) (float32x4_t x)
 #endif
 
   /* n = rint((|x|+pi/2)/pi) - 0.5.  */
-  n = vfmaq_f32 (d->shift, d->inv_pi, vaddq_f32 (r, d->half_pi));
-  odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31);
-  n = vsubq_f32 (n, d->shift);
+  n = vrndaq_f32 (vfmaq_f32 (v_f32 (0.5), r, d->inv_pi));
+  odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31);
   n = vsubq_f32 (n, v_f32 (0.5f));
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
@@ -80,3 +79,11 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cos) (float32x4_t x)
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
 }
+
+HALF_WIDTH_ALIAS_F1 (cos)
+
+TEST_SIG (V, F, 1, cos, -3.1, 3.1)
+TEST_ULP (V_NAME_F1 (cos), 1.4)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (cos), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (cos), 0, 0x1p20, 500000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cos), 0x1p20, inf, 10000)
diff --git a/pl/math/v_cosh_2u.c b/math/aarch64/advsimd/cosh.c
similarity index 84%
rename from pl/math/v_cosh_2u.c
rename to math/aarch64/advsimd/cosh.c
index 649c390f4622d6..54407b23aa9dce 100644
--- a/pl/math/v_cosh_2u.c
+++ b/math/aarch64/advsimd/cosh.c
@@ -1,18 +1,20 @@
 /*
  * Double-precision vector cosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t poly[3];
-  float64x2_t inv_ln2, ln2, shift, thres;
+  float64x2_t inv_ln2;
+  double ln2[2];
+  float64x2_t shift, thres;
   uint64x2_t index_mask, special_bound;
 } data = {
   .poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
@@ -48,8 +50,9 @@ exp_inline (float64x2_t x)
   float64x2_t n = vsubq_f64 (z, d->shift);
 
   /* r = x - n*ln2/N.  */
-  float64x2_t r = vfmaq_laneq_f64 (x, n, d->ln2, 0);
-  r = vfmaq_laneq_f64 (r, n, d->ln2, 1);
+  float64x2_t ln2 = vld1q_f64 (d->ln2);
+  float64x2_t r = vfmaq_laneq_f64 (x, n, ln2, 0);
+  r = vfmaq_laneq_f64 (r, n, ln2, 1);
 
   uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
   uint64x2_t i = vandq_u64 (u, d->index_mask);
@@ -97,8 +100,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cosh) (float64x2_t x)
   return vaddq_f64 (half_t, half_over_t);
 }
 
-PL_SIG (V, D, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (cosh), 1.43)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (cosh))
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
+TEST_SIG (V, D, 1, cosh, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (cosh), 1.43)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (cosh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
diff --git a/pl/math/v_coshf_2u4.c b/math/aarch64/advsimd/coshf.c
similarity index 64%
rename from pl/math/v_coshf_2u4.c
rename to math/aarch64/advsimd/coshf.c
index c622b0b183f1dc..f1ed3e5161fdc8 100644
--- a/pl/math/v_coshf_2u4.c
+++ b/math/aarch64/advsimd/coshf.c
@@ -1,32 +1,39 @@
 /*
  * Single-precision vector cosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_expf_inline.h"
 #include "v_math.h"
-#include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   struct v_expf_data expf_consts;
-  uint32x4_t tiny_bound, special_bound;
+  uint32x4_t tiny_bound;
+  float32x4_t bound;
+#if WANT_SIMD_EXCEPT
+  uint32x4_t special_bound;
+#endif
 } data = {
   .expf_consts = V_EXPF_DATA,
   .tiny_bound = V4 (0x20000000), /* 0x1p-63: Round to 1 below this.  */
   /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
+  .bound = V4 (0x1.5a92d8p+6),
+#if WANT_SIMD_EXCEPT
   .special_bound = V4 (0x42ad496c),
+#endif
 };
 
 #if !WANT_SIMD_EXCEPT
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t half_t, float32x4_t half_over_t,
+	      uint32x4_t special)
 {
-  return v_call_f32 (coshf, x, y, special);
+  return v_call_f32 (coshf, x, vaddq_f32 (half_t, half_over_t), special);
 }
 #endif
 
@@ -34,18 +41,17 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
    Maximum error is 2.38 ULP:
    _ZGVnN4v_coshf (0x1.e8001ep+1) got 0x1.6a491ep+4
 				 want 0x1.6a4922p+4.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cosh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
-  float32x4_t ax = vabsq_f32 (x);
-  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
-  uint32x4_t special = vcgeq_u32 (iax, d->special_bound);
-
 #if WANT_SIMD_EXCEPT
   /* If fp exceptions are to be triggered correctly, fall back to the scalar
      variant for all inputs if any input is a special value or above the bound
      at which expf overflows.  */
+  float32x4_t ax = vabsq_f32 (x);
+  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+  uint32x4_t special = vcgeq_u32 (iax, d->special_bound);
   if (unlikely (v_any_u32 (special)))
     return v_call_f32 (coshf, x, x, v_u32 (-1));
 
@@ -54,10 +60,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
      input to 0, which will generate no exceptions.  */
   if (unlikely (v_any_u32 (tiny)))
     ax = v_zerofy_f32 (ax, tiny);
+  float32x4_t t = v_expf_inline (ax, &d->expf_consts);
+#else
+  uint32x4_t special = vcageq_f32 (x, d->bound);
+  float32x4_t t = v_expf_inline (x, &d->expf_consts);
 #endif
 
   /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.  */
-  float32x4_t t = v_expf_inline (ax, &d->expf_consts);
   float32x4_t half_t = vmulq_n_f32 (t, 0.5);
   float32x4_t half_over_t = vdivq_f32 (v_f32 (0.5), t);
 
@@ -66,15 +75,18 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
     return vbslq_f32 (tiny, v_f32 (1), vaddq_f32 (half_t, half_over_t));
 #else
   if (unlikely (v_any_u32 (special)))
-    return special_case (x, vaddq_f32 (half_t, half_over_t), special);
+    return special_case (x, half_t, half_over_t, special);
 #endif
 
   return vaddq_f32 (half_t, half_over_t);
 }
 
-PL_SIG (V, F, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (cosh), 1.89)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (cosh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0, 0x1p-63, 100)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0, 0x1.5a92d8p+6, 80000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
+HALF_WIDTH_ALIAS_F1 (cosh)
+
+TEST_SIG (V, F, 1, cosh, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (cosh), 1.89)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (cosh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0, 0x1p-63, 100)
+TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0x1p-63, 1, 1000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 1, 0x1.5a92d8p+6, 80000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
diff --git a/pl/math/v_cospi_3u1.c b/math/aarch64/advsimd/cospi.c
similarity index 81%
rename from pl/math/v_cospi_3u1.c
rename to math/aarch64/advsimd/cospi.c
index 3c2ee0b74c8ead..e63201a5578611 100644
--- a/pl/math/v_cospi_3u1.c
+++ b/math/aarch64/advsimd/cospi.c
@@ -1,15 +1,15 @@
 /*
  * Double-precision vector cospi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -31,7 +31,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
 {
   /* Fall back to scalar code.  */
   y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
-  return v_call_f64 (cospi, x, y, cmp);
+  return v_call_f64 (arm_math_cospi, x, y, cmp);
 }
 
 /* Approximation for vector double-precision cospi(x).
@@ -77,10 +77,11 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cospi) (float64x2_t x)
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
 }
 
-PL_SIG (V, D, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (V_NAME_D1 (cospi), 2.56)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (cospi), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p51, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (V_NAME_D1 (cospi), 2.56)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (cospi), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p51, inf, 10000)
+#endif
diff --git a/pl/math/v_cospif_3u2.c b/math/aarch64/advsimd/cospif.c
similarity index 76%
rename from pl/math/v_cospif_3u2.c
rename to math/aarch64/advsimd/cospif.c
index d88aa828439d15..62f4b8122b2cff 100644
--- a/pl/math/v_cospif_3u2.c
+++ b/math/aarch64/advsimd/cospif.c
@@ -1,15 +1,15 @@
 /*
  * Single-precision vector cospi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -26,14 +26,14 @@ static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
 {
   y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
-  return v_call_f32 (cospif, x, y, cmp);
+  return v_call_f32 (arm_math_cospif, x, y, cmp);
 }
 
 /* Approximation for vector single-precision cospi(x)
     Maximum Error: 3.17 ULP:
     _ZGVnN4v_cospif(0x1.d341a8p-5) got 0x1.f7cd56p-1
 				  want 0x1.f7cd5p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (cospi) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cospi) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -74,10 +74,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cospi) (float32x4_t x)
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
 }
 
-PL_SIG (V, F, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (V_NAME_F1 (cospi), 2.67)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (cospi), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0.5, 0x1p32f, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p32f, inf, 10000)
+HALF_WIDTH_ALIAS_F1 (cospi)
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (V_NAME_F1 (cospi), 2.67)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (cospi), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0.5, 0x1p32f, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p32f, inf, 10000)
+#endif
diff --git a/pl/math/v_erf_2u5.c b/math/aarch64/advsimd/erf.c
similarity index 77%
rename from pl/math/v_erf_2u5.c
rename to math/aarch64/advsimd/erf.c
index e581ec5bb8a73c..40717a660ce2fb 100644
--- a/pl/math/v_erf_2u5.c
+++ b/math/aarch64/advsimd/erf.c
@@ -1,30 +1,32 @@
 /*
  * Double-precision vector erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t third;
-  float64x2_t tenth, two_over_five, two_over_fifteen;
-  float64x2_t two_over_nine, two_over_fortyfive;
+  float64x2_t tenth, two_over_five, two_over_nine;
+  double two_over_fifteen, two_over_fortyfive;
   float64x2_t max, shift;
+  uint64x2_t max_idx;
 #if WANT_SIMD_EXCEPT
   float64x2_t tiny_bound, huge_bound, scale_minus_one;
 #endif
 } data = {
+  .max_idx = V2 (768),
   .third = V2 (0x1.5555555555556p-2), /* used to compute 2/3 and 1/6 too.  */
-  .two_over_fifteen = V2 (0x1.1111111111111p-3),
+  .two_over_fifteen = 0x1.1111111111111p-3,
   .tenth = V2 (-0x1.999999999999ap-4),
   .two_over_five = V2 (-0x1.999999999999ap-2),
   .two_over_nine = V2 (-0x1.c71c71c71c71cp-3),
-  .two_over_fortyfive = V2 (0x1.6c16c16c16c17p-5),
+  .two_over_fortyfive = 0x1.6c16c16c16c17p-5,
   .max = V2 (5.9921875), /* 6 - 1/128.  */
   .shift = V2 (0x1p45),
 #if WANT_SIMD_EXCEPT
@@ -46,8 +48,8 @@ static inline struct entry
 lookup (uint64x2_t i)
 {
   struct entry e;
-  float64x2_t e1 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[0])),
-	      e2 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[1]));
+  float64x2_t e1 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 0)].erf),
+	      e2 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 1)].erf);
   e.erf = vuzp1q_f64 (e1, e2);
   e.scale = vuzp2q_f64 (e1, e2);
   return e;
@@ -77,8 +79,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
   float64x2_t a = vabsq_f64 (x);
   /* Reciprocal conditions that do not catch NaNs so they can be used in BSLs
      to return expected results.  */
-  uint64x2_t a_le_max = vcleq_f64 (a, dat->max);
-  uint64x2_t a_gt_max = vcgtq_f64 (a, dat->max);
+  uint64x2_t a_le_max = vcaleq_f64 (x, dat->max);
+  uint64x2_t a_gt_max = vcagtq_f64 (x, dat->max);
 
 #if WANT_SIMD_EXCEPT
   /* |x| huge or tiny.  */
@@ -105,7 +107,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
      segfault.  */
   uint64x2_t i
       = vsubq_u64 (vreinterpretq_u64_f64 (z), vreinterpretq_u64_f64 (shift));
-  i = vbslq_u64 (a_le_max, i, v_u64 (768));
+  i = vbslq_u64 (a_le_max, i, dat->max_idx);
   struct entry e = lookup (i);
 
   float64x2_t r = vsubq_f64 (z, shift);
@@ -115,14 +117,19 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
   float64x2_t d2 = vmulq_f64 (d, d);
   float64x2_t r2 = vmulq_f64 (r, r);
 
+  float64x2_t two_over_fifteen_and_fortyfive
+      = vld1q_f64 (&dat->two_over_fifteen);
+
   /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5.  */
   float64x2_t p1 = r;
   float64x2_t p2
       = vfmsq_f64 (dat->third, r2, vaddq_f64 (dat->third, dat->third));
   float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->third));
-  float64x2_t p4 = vfmaq_f64 (dat->two_over_five, r2, dat->two_over_fifteen);
+  float64x2_t p4 = vfmaq_laneq_f64 (dat->two_over_five, r2,
+				    two_over_fifteen_and_fortyfive, 0);
   p4 = vfmsq_f64 (dat->tenth, r2, p4);
-  float64x2_t p5 = vfmaq_f64 (dat->two_over_nine, r2, dat->two_over_fortyfive);
+  float64x2_t p5 = vfmaq_laneq_f64 (dat->two_over_nine, r2,
+				    two_over_fifteen_and_fortyfive, 1);
   p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->third), r2, p5));
 
   float64x2_t p34 = vfmaq_f64 (p3, d, p4);
@@ -150,9 +157,10 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
   return y;
 }
 
-PL_SIG (V, D, 1, erf, -6.0, 6.0)
-PL_TEST_ULP (V_NAME_D1 (erf), 1.79)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (erf), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, 5.9921875, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 5.9921875, inf, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, inf, 40000)
+TEST_SIG (V, D, 1, erf, -6.0, 6.0)
+TEST_ULP (V_NAME_D1 (erf), 1.79)
+/* WANT_SIMD_EXCEPT blocks miss some cases.  */
+TEST_DISABLE_FENV (V_NAME_D1 (erf))
+TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, 5.9921875, 40000)
+TEST_SYM_INTERVAL (V_NAME_D1 (erf), 5.9921875, inf, 40000)
+TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, inf, 40000)
diff --git a/pl/math/v_erfc_1u8.c b/math/aarch64/advsimd/erfc.c
similarity index 77%
rename from pl/math/v_erfc_1u8.c
rename to math/aarch64/advsimd/erfc.c
index 10ef7e6a3c34e3..97ef09ecc113c7 100644
--- a/pl/math/v_erfc_1u8.c
+++ b/math/aarch64/advsimd/erfc.c
@@ -1,21 +1,21 @@
 /*
  * Double-precision vector erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   uint64x2_t offset, table_scale;
   float64x2_t max, shift;
-  float64x2_t p20, p40, p41, p42;
-  float64x2_t p51, p52;
-  float64x2_t qr5, qr6, qr7, qr8, qr9;
+  float64x2_t p20, p40, p41, p51;
+  double p42, p52;
+  double qr5[2], qr6[2], qr7[2], qr8[2], qr9[2];
 #if WANT_SIMD_EXCEPT
   float64x2_t uflow_bound;
 #endif
@@ -30,9 +30,9 @@ static const struct data
   .p20 = V2 (0x1.5555555555555p-2),  /* 1/3, used to compute 2/3 and 1/6.  */
   .p40 = V2 (-0x1.999999999999ap-4), /* 1/10.  */
   .p41 = V2 (-0x1.999999999999ap-2), /* 2/5.  */
-  .p42 = V2 (0x1.1111111111111p-3),  /* 2/15.  */
+  .p42 = 0x1.1111111111111p-3,	     /* 2/15.  */
   .p51 = V2 (-0x1.c71c71c71c71cp-3), /* 2/9.  */
-  .p52 = V2 (0x1.6c16c16c16c17p-5),  /* 2/45.  */
+  .p52 = 0x1.6c16c16c16c17p-5,	     /* 2/45.  */
   /* Qi = (i+1) / i, Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9.  */
   .qr5 = { 0x1.3333333333333p0, -0x1.e79e79e79e79ep-3 },
   .qr6 = { 0x1.2aaaaaaaaaaabp0, -0x1.b6db6db6db6dbp-3 },
@@ -57,8 +57,10 @@ static inline struct entry
 lookup (uint64x2_t i)
 {
   struct entry e;
-  float64x2_t e1 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[0])),
-	      e2 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[1]));
+  float64x2_t e1
+      = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc);
+  float64x2_t e2
+      = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc);
   e.erfc = vuzp1q_f64 (e1, e2);
   e.scale = vuzp2q_f64 (e1, e2);
   return e;
@@ -144,22 +146,26 @@ float64x2_t V_NAME_D1 (erfc) (float64x2_t x)
   float64x2_t p1 = r;
   float64x2_t p2 = vfmsq_f64 (dat->p20, r2, vaddq_f64 (dat->p20, dat->p20));
   float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->p20));
-  float64x2_t p4 = vfmaq_f64 (dat->p41, r2, dat->p42);
+  float64x2_t p42_p52 = vld1q_f64 (&dat->p42);
+  float64x2_t p4 = vfmaq_laneq_f64 (dat->p41, r2, p42_p52, 0);
   p4 = vfmsq_f64 (dat->p40, r2, p4);
-  float64x2_t p5 = vfmaq_f64 (dat->p51, r2, dat->p52);
+  float64x2_t p5 = vfmaq_laneq_f64 (dat->p51, r2, p42_p52, 1);
   p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5));
   /* Compute p_i using recurrence relation:
      p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}.  */
-  float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, dat->qr5, 0));
-  p6 = vmulq_laneq_f64 (p6, dat->qr5, 1);
-  float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, dat->qr6, 0));
-  p7 = vmulq_laneq_f64 (p7, dat->qr6, 1);
-  float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, dat->qr7, 0));
-  p8 = vmulq_laneq_f64 (p8, dat->qr7, 1);
-  float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, dat->qr8, 0));
-  p9 = vmulq_laneq_f64 (p9, dat->qr8, 1);
-  float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, dat->qr9, 0));
-  p10 = vmulq_laneq_f64 (p10, dat->qr9, 1);
+  float64x2_t qr5 = vld1q_f64 (dat->qr5), qr6 = vld1q_f64 (dat->qr6),
+	      qr7 = vld1q_f64 (dat->qr7), qr8 = vld1q_f64 (dat->qr8),
+	      qr9 = vld1q_f64 (dat->qr9);
+  float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, qr5, 0));
+  p6 = vmulq_laneq_f64 (p6, qr5, 1);
+  float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, qr6, 0));
+  p7 = vmulq_laneq_f64 (p7, qr6, 1);
+  float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, qr7, 0));
+  p8 = vmulq_laneq_f64 (p8, qr7, 1);
+  float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, qr8, 0));
+  p9 = vmulq_laneq_f64 (p9, qr8, 1);
+  float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, qr9, 0));
+  p10 = vmulq_laneq_f64 (p10, qr9, 1);
   /* Compute polynomial in d using pairwise Horner scheme.  */
   float64x2_t p90 = vfmaq_f64 (p9, d, p10);
   float64x2_t p78 = vfmaq_f64 (p7, d, p8);
@@ -189,10 +195,11 @@ float64x2_t V_NAME_D1 (erfc) (float64x2_t x)
   return vfmaq_f64 (off, fac, y);
 }
 
-PL_SIG (V, D, 1, erfc, -6.0, 28.0)
-PL_TEST_ULP (V_NAME_D1 (erfc), 1.21)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (erfc), 0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (erfc), 28.0, inf, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (erfc), -6.0, -inf, 40000)
+TEST_SIG (V, D, 1, erfc, -6.0, 28.0)
+TEST_ULP (V_NAME_D1 (erfc), 1.21)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (erfc), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (erfc), 0, 0x1p-26, 40000)
+TEST_INTERVAL (V_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
+TEST_INTERVAL (V_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
+TEST_INTERVAL (V_NAME_D1 (erfc), 28.0, inf, 40000)
+TEST_INTERVAL (V_NAME_D1 (erfc), -6.0, -inf, 40000)
diff --git a/pl/math/v_erfcf_1u7.c b/math/aarch64/advsimd/erfcf.c
similarity index 76%
rename from pl/math/v_erfcf_1u7.c
rename to math/aarch64/advsimd/erfcf.c
index c361d070443827..f420439ef8a3b6 100644
--- a/pl/math/v_erfcf_1u7.c
+++ b/math/aarch64/advsimd/erfcf.c
@@ -1,19 +1,20 @@
 /*
  * Single-precision vector erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   uint32x4_t offset, table_scale;
   float32x4_t max, shift;
-  float32x4_t coeffs, third, two_over_five, tenth;
+  float coeffs[4];
+  float32x4_t third, two_over_five, tenth;
 #if WANT_SIMD_EXCEPT
   float32x4_t uflow_bound;
 #endif
@@ -27,7 +28,7 @@ static const struct data
   .shift = V4 (0x1p17f),
   /* Store 1/3, 2/3 and 2/15 in a single register for use with indexed muls and
      fmas.  */
-  .coeffs = (float32x4_t){ 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
+  .coeffs = { 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
   .third = V4 (0x1.555556p-2f),
   .two_over_five = V4 (-0x1.99999ap-2f),
   .tenth = V4 (-0x1.99999ap-4f),
@@ -50,12 +51,16 @@ static inline struct entry
 lookup (uint32x4_t i)
 {
   struct entry e;
-  float64_t t0 = *((float64_t *) (__erfcf_data.tab - Off + i[0]));
-  float64_t t1 = *((float64_t *) (__erfcf_data.tab - Off + i[1]));
-  float64_t t2 = *((float64_t *) (__erfcf_data.tab - Off + i[2]));
-  float64_t t3 = *((float64_t *) (__erfcf_data.tab - Off + i[3]));
-  float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
-  float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+  float32x2_t t0
+      = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc);
+  float32x2_t t1
+      = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc);
+  float32x2_t t2
+      = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc);
+  float32x2_t t3
+      = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc);
+  float32x4_t e1 = vcombine_f32 (t0, t1);
+  float32x4_t e2 = vcombine_f32 (t2, t3);
   e.erfc = vuzp1q_f32 (e1, e2);
   e.scale = vuzp2q_f32 (e1, e2);
   return e;
@@ -86,8 +91,7 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
    Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
    _ZGVnN4v_erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
 				want 0x1.f51216p-120.  */
-VPCS_ATTR
-float32x4_t V_NAME_F1 (erfc) (float32x4_t x)
+NOINLINE VPCS_ATTR float32x4_t V_NAME_F1 (erfc) (float32x4_t x)
 {
   const struct data *dat = ptr_barrier (&data);
 
@@ -130,10 +134,11 @@ float32x4_t V_NAME_F1 (erfc) (float32x4_t x)
   float32x4_t r2 = vmulq_f32 (r, r);
 
   float32x4_t p1 = r;
-  float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, dat->coeffs, 1);
+  float32x4_t coeffs = vld1q_f32 (dat->coeffs);
+  float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, coeffs, 1);
   float32x4_t p3
-      = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, dat->coeffs, 0));
-  float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, dat->coeffs, 2);
+      = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, coeffs, 0));
+  float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, coeffs, 2);
   p4 = vfmsq_f32 (dat->tenth, r2, p4);
 
   float32x4_t y = vfmaq_f32 (p3, d, p4);
@@ -157,10 +162,13 @@ float32x4_t V_NAME_F1 (erfc) (float32x4_t x)
   return vfmaq_f32 (off, fac, y);
 }
 
-PL_SIG (V, F, 1, erfc, -4.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (erfc), 1.14)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (erfc), 0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (erfc), 10.0625, inf, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (erfc), -4.0, -inf, 40000)
+HALF_WIDTH_ALIAS_F1 (erfc)
+
+TEST_SIG (V, F, 1, erfc, -4.0, 10.0)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (erfc), WANT_SIMD_EXCEPT)
+TEST_ULP (V_NAME_F1 (erfc), 1.14)
+TEST_SYM_INTERVAL (V_NAME_F1 (erfc), 0, 0x1p-26, 40000)
+TEST_INTERVAL (V_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
+TEST_INTERVAL (V_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
+TEST_INTERVAL (V_NAME_F1 (erfc), 10.0625, inf, 40000)
+TEST_INTERVAL (V_NAME_F1 (erfc), -4.0, -inf, 40000)
diff --git a/pl/math/v_erff_2u.c b/math/aarch64/advsimd/erff.c
similarity index 76%
rename from pl/math/v_erff_2u.c
rename to math/aarch64/advsimd/erff.c
index 502526407df229..508bc4c2f5e226 100644
--- a/pl/math/v_erff_2u.c
+++ b/math/aarch64/advsimd/erff.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -37,12 +37,12 @@ static inline struct entry
 lookup (uint32x4_t i)
 {
   struct entry e;
-  float64_t t0 = *((float64_t *) (__erff_data.tab + i[0]));
-  float64_t t1 = *((float64_t *) (__erff_data.tab + i[1]));
-  float64_t t2 = *((float64_t *) (__erff_data.tab + i[2]));
-  float64_t t3 = *((float64_t *) (__erff_data.tab + i[3]));
-  float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
-  float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+  float32x2_t t0 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 0)].erf);
+  float32x2_t t1 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 1)].erf);
+  float32x2_t t2 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 2)].erf);
+  float32x2_t t3 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 3)].erf);
+  float32x4_t e1 = vcombine_f32 (t0, t1);
+  float32x4_t e2 = vcombine_f32 (t2, t3);
   e.erf = vuzp1q_f32 (e1, e2);
   e.scale = vuzp2q_f32 (e1, e2);
   return e;
@@ -61,7 +61,7 @@ lookup (uint32x4_t i)
    Maximum error: 1.93 ULP
      _ZGVnN4v_erff(0x1.c373e6p-9) got 0x1.fd686cp-9
 				 want 0x1.fd6868p-9.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (erf) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (erf) (float32x4_t x)
 {
   const struct data *dat = ptr_barrier (&data);
 
@@ -110,9 +110,11 @@ float32x4_t VPCS_ATTR V_NAME_F1 (erf) (float32x4_t x)
   return y;
 }
 
-PL_SIG (V, F, 1, erf, -4.0, 4.0)
-PL_TEST_ULP (V_NAME_F1 (erf), 1.43)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (erf), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, 3.9375, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 3.9375, inf, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, inf, 40000)
+HALF_WIDTH_ALIAS_F1 (erf)
+
+TEST_SIG (V, F, 1, erf, -4.0, 4.0)
+TEST_ULP (V_NAME_F1 (erf), 1.43)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (erf), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, 3.9375, 40000)
+TEST_SYM_INTERVAL (V_NAME_F1 (erf), 3.9375, inf, 40000)
+TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, inf, 40000)
diff --git a/math/aarch64/v_exp.c b/math/aarch64/advsimd/exp.c
similarity index 90%
rename from math/aarch64/v_exp.c
rename to math/aarch64/advsimd/exp.c
index bc5609faf4fc35..a928c35c9418b7 100644
--- a/math/aarch64/v_exp.c
+++ b/math/aarch64/advsimd/exp.c
@@ -1,12 +1,14 @@
 /*
  * Double-precision vector e^x function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define N (1 << V_EXP_TABLE_BITS)
 #define IndexMask (N - 1)
@@ -123,3 +125,10 @@ float64x2_t VPCS_ATTR V_NAME_D1 (exp) (float64x2_t x)
 
   return vfmaq_f64 (s, y, s);
 }
+
+TEST_SIG (V, D, 1, exp, -9.9, 9.9)
+TEST_ULP (V_NAME_D1 (exp), 1.9)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (exp), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (exp), 0, 0xffff000000000000, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp), 0x1p-6, 0x1p6, 400000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp), 633.3, 733.3, 10000)
diff --git a/pl/math/v_exp10_2u.c b/math/aarch64/advsimd/exp10.c
similarity index 89%
rename from pl/math/v_exp10_2u.c
rename to math/aarch64/advsimd/exp10.c
index 29072a60fb3aac..24fdd1c7d257a1 100644
--- a/pl/math/v_exp10_2u.c
+++ b/math/aarch64/advsimd/exp10.c
@@ -1,14 +1,15 @@
 /*
  * Double-precision vector 10^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#define _GNU_SOURCE
 #include "mathlib.h"
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Value of |x| above which scale overflows without special treatment.  */
 #define SpecialBound 306.0 /* floor (log10 (2^1023)) - 1.  */
@@ -135,10 +136,12 @@ float64x2_t VPCS_ATTR V_NAME_D1 (exp10) (float64x2_t x)
   return vfmaq_f64 (s, y, s);
 }
 
-PL_SIG (S, D, 1, exp10, -9.9, 9.9)
-PL_SIG (V, D, 1, exp10, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_D1 (exp10), 1.15)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (exp10), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), 0, SpecialBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), SpecialBound, ScaleBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), ScaleBound, inf, 10000)
+#if WANT_EXP10_TESTS
+TEST_SIG (S, D, 1, exp10, -9.9, 9.9)
+TEST_SIG (V, D, 1, exp10, -9.9, 9.9)
+TEST_ULP (V_NAME_D1 (exp10), 1.15)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (exp10), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp10), 0, SpecialBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp10), SpecialBound, ScaleBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp10), ScaleBound, inf, 10000)
+#endif
diff --git a/pl/math/v_exp10f_2u4.c b/math/aarch64/advsimd/exp10f.c
similarity index 58%
rename from pl/math/v_exp10f_2u4.c
rename to math/aarch64/advsimd/exp10f.c
index 0e91becfa61291..eb0d5dd0d57cb4 100644
--- a/pl/math/v_exp10f_2u4.c
+++ b/math/aarch64/advsimd/exp10f.c
@@ -1,23 +1,24 @@
 /*
  * Single-precision vector 10^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
+#define _GNU_SOURCE
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f32.h"
 
 #define ScaleBound 192.0f
 
 static const struct data
 {
-  float32x4_t poly[5];
-  float32x4_t log10_2_and_inv, shift;
-
+  float32x4_t c0, c1, c3;
+  float log10_2_high, log10_2_low, c2, c4;
+  float32x4_t inv_log10_2, special_bound;
+  uint32x4_t exponent_bias, special_offset, special_bias;
 #if !WANT_SIMD_EXCEPT
   float32x4_t scale_thresh;
 #endif
@@ -27,19 +28,24 @@ static const struct data
      rel error: 0x1.89dafa3p-24
      abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
      maxerr: 1.85943 +0.5 ulp.  */
-  .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f),
-	    V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) },
-  .shift = V4 (0x1.8p23f),
-
-  /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0.  */
-  .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 },
+  .c0 = V4 (0x1.26bb16p+1f),
+  .c1 = V4 (0x1.5350d2p+1f),
+  .c2 = 0x1.04744ap+1f,
+  .c3 = V4 (0x1.2d8176p+0f),
+  .c4 = 0x1.12b41ap-1f,
+  .inv_log10_2 = V4 (0x1.a934fp+1),
+  .log10_2_high = 0x1.344136p-2,
+  .log10_2_low = 0x1.ec10cp-27,
+  /* rint (log2 (2^127 / (1 + sqrt (2)))).  */
+  .special_bound = V4 (126.0f),
+  .exponent_bias = V4 (0x3f800000),
+  .special_offset = V4 (0x82000000),
+  .special_bias = V4 (0x7f000000),
 #if !WANT_SIMD_EXCEPT
   .scale_thresh = V4 (ScaleBound)
 #endif
 };
 
-#define ExponentBias v_u32 (0x3f800000)
-
 #if WANT_SIMD_EXCEPT
 
 # define SpecialBound 38.0f	       /* rint(log10(2^127)).  */
@@ -57,17 +63,15 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
 
 #else
 
-# define SpecialBound 126.0f /* rint (log2 (2^127 / (1 + sqrt (2)))).  */
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
+# define SpecialBound 126.0f
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 	      float32x4_t scale, const struct data *d)
 {
   /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
-  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
   float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
   uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
   float32x4_t r2 = vmulq_f32 (s1, s1);
@@ -84,7 +88,7 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
    Algorithm is accurate to 2.36 ULP.
    _ZGVnN4v_exp10f(0x1.be2b36p+1) got 0x1.7e79c4p+11
 				 want 0x1.7e79cp+11.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (exp10) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 #if WANT_SIMD_EXCEPT
@@ -102,22 +106,23 @@ float32x4_t VPCS_ATTR V_NAME_F1 (exp10) (float32x4_t x)
   /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
      with poly(r) in [1/sqrt(2), sqrt(2)] and
      x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2].  */
-  float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0);
-  float32x4_t n = vsubq_f32 (z, d->shift);
-  float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1);
-  r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2);
-  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+  float32x4_t log10_2_c24 = vld1q_f32 (&d->log10_2_high);
+  float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_log10_2));
+  float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_c24, 0);
+  r = vfmaq_laneq_f32 (r, n, log10_2_c24, 1);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (n)), 23);
 
-  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
 
 #if !WANT_SIMD_EXCEPT
-  uint32x4_t cmp = vcagtq_f32 (n, v_f32 (SpecialBound));
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
 #endif
 
   float32x4_t r2 = vmulq_f32 (r, r);
-  float32x4_t poly
-      = vfmaq_f32 (vmulq_f32 (r, d->poly[0]),
-		   v_pairwise_poly_3_f32 (r, r2, d->poly + 1), r2);
+  float32x4_t p12 = vfmaq_laneq_f32 (d->c1, r, log10_2_c24, 2);
+  float32x4_t p34 = vfmaq_laneq_f32 (d->c3, r, log10_2_c24, 3);
+  float32x4_t p14 = vfmaq_f32 (p12, r2, p34);
+  float32x4_t poly = vfmaq_f32 (vmulq_f32 (r, d->c0), p14, r2);
 
   if (unlikely (v_any_u32 (cmp)))
 #if WANT_SIMD_EXCEPT
@@ -129,10 +134,14 @@ float32x4_t VPCS_ATTR V_NAME_F1 (exp10) (float32x4_t x)
   return vfmaq_f32 (scale, poly, scale);
 }
 
-PL_SIG (S, F, 1, exp10, -9.9, 9.9)
-PL_SIG (V, F, 1, exp10, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_F1 (exp10), 1.86)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (exp10), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), 0, SpecialBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), SpecialBound, ScaleBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), ScaleBound, inf, 10000)
+HALF_WIDTH_ALIAS_F1 (exp10)
+
+#if WANT_EXP10_TESTS
+TEST_SIG (S, F, 1, exp10, -9.9, 9.9)
+TEST_SIG (V, F, 1, exp10, -9.9, 9.9)
+TEST_ULP (V_NAME_F1 (exp10), 1.86)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (exp10), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp10), 0, SpecialBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp10), SpecialBound, ScaleBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp10), ScaleBound, inf, 10000)
+#endif
diff --git a/pl/math/v_exp2_2u.c b/math/aarch64/advsimd/exp2.c
similarity index 82%
rename from pl/math/v_exp2_2u.c
rename to math/aarch64/advsimd/exp2.c
index de59779689f59f..63448d806b8221 100644
--- a/pl/math/v_exp2_2u.c
+++ b/math/aarch64/advsimd/exp2.c
@@ -1,19 +1,20 @@
 /*
  * Double-precision vector 2^x function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define N (1 << V_EXP_TABLE_BITS)
 #define IndexMask (N - 1)
 #define BigBound 1022.0
 #define UOFlowBound 1280.0
+#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
 
 static const struct data
 {
@@ -38,7 +39,6 @@ lookup_sbits (uint64x2_t i)
 
 #if WANT_SIMD_EXCEPT
 
-# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
 # define Thres 0x2080000000000000     /* asuint64(512.0) - TinyBound.  */
 
 /* Call scalar exp2 as a fallback.  */
@@ -62,8 +62,8 @@ special_case (float64x2_t s, float64x2_t y, float64x2_t n,
   /* 2^(n/N) may overflow, break it up into s1*s2.  */
   uint64x2_t b = vandq_u64 (vclezq_f64 (n), v_u64 (SpecialOffset));
   float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (v_u64 (SpecialBias1), b));
-  float64x2_t s2 = vreinterpretq_f64_u64 (
-    vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), v_u64 (SpecialBias2)), b));
+  float64x2_t s2 = vreinterpretq_f64_u64 (vaddq_u64 (
+      vsubq_u64 (vreinterpretq_u64_f64 (s), v_u64 (SpecialBias2)), b));
   uint64x2_t cmp = vcagtq_f64 (n, d->scale_uoflow_bound);
   float64x2_t r1 = vmulq_f64 (s1, s1);
   float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, s2, y), s1);
@@ -119,10 +119,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
   return vfmaq_f64 (s, s, y);
 }
 
-PL_SIG (V, D, 1, exp2, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_D1 (exp2), 1.15)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (exp2), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), 0, TinyBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), TinyBound, BigBound, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), BigBound, UOFlowBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), UOFlowBound, inf, 10000)
+TEST_SIG (V, D, 1, exp2, -9.9, 9.9)
+TEST_ULP (V_NAME_D1 (exp2), 1.15)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (exp2), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp2), 0, TinyBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp2), TinyBound, BigBound, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp2), BigBound, UOFlowBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp2), UOFlowBound, inf, 10000)
diff --git a/math/aarch64/v_exp2f.c b/math/aarch64/advsimd/exp2f.c
similarity index 58%
rename from math/aarch64/v_exp2f.c
rename to math/aarch64/advsimd/exp2f.c
index e402205e98e6be..40f6170d3702a7 100644
--- a/math/aarch64/v_exp2f.c
+++ b/math/aarch64/advsimd/exp2f.c
@@ -1,33 +1,38 @@
 /*
  * Single-precision vector 2^x function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
-  float32x4_t poly[5];
-  uint32x4_t exponent_bias;
+  float32x4_t c1, c3;
+  uint32x4_t exponent_bias, special_offset, special_bias;
 #if !WANT_SIMD_EXCEPT
-  float32x4_t special_bound, scale_thresh;
+  float32x4_t scale_thresh, special_bound;
 #endif
+  float c0, c2, c4, zero;
 } data = {
   /* maxerr: 1.962 ulp.  */
-  .poly = { V4 (0x1.59977ap-10f), V4 (0x1.3ce9e4p-7f), V4 (0x1.c6bd32p-5f),
-	    V4 (0x1.ebf9bcp-3f), V4 (0x1.62e422p-1f) },
+  .c0 = 0x1.59977ap-10f,
+  .c1 = V4 (0x1.3ce9e4p-7f),
+  .c2 = 0x1.c6bd32p-5f,
+  .c3 = V4 (0x1.ebf9bcp-3f),
+  .c4 = 0x1.62e422p-1f,
   .exponent_bias = V4 (0x3f800000),
+  .special_offset = V4 (0x82000000),
+  .special_bias = V4 (0x7f000000),
 #if !WANT_SIMD_EXCEPT
   .special_bound = V4 (126.0f),
   .scale_thresh = V4 (192.0f),
 #endif
 };
 
-#define C(i) d->poly[i]
-
 #if WANT_SIMD_EXCEPT
 
 # define TinyBound v_u32 (0x20000000)	  /* asuint (0x1p-63).  */
@@ -44,16 +49,13 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
 
 #else
 
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
-
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 	      float32x4_t scale, const struct data *d)
 {
   /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
-  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
   float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
   uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
   float32x4_t r2 = vmulq_f32 (s1, s1);
@@ -66,16 +68,14 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 
 #endif
 
-float32x4_t VPCS_ATTR V_NAME_F1 (exp2) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp2) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-  float32x4_t n, r, r2, scale, p, q, poly;
-  uint32x4_t cmp, e;
 
 #if WANT_SIMD_EXCEPT
   /* asuint(|x|) - TinyBound >= BigBound - TinyBound.  */
   uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
-  cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound);
+  uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound);
   float32x4_t xm = x;
   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
      special_case to fix special lanes later. This is only necessary if fenv
@@ -84,23 +84,24 @@ float32x4_t VPCS_ATTR V_NAME_F1 (exp2) (float32x4_t x)
     x = vbslq_f32 (cmp, v_f32 (1), x);
 #endif
 
-    /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-       x = n + r, with r in [-1/2, 1/2].  */
-  n = vrndaq_f32 (x);
-  r = vsubq_f32 (x, n);
-  e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23);
-  scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     x = n + r, with r in [-1/2, 1/2].  */
+  float32x4_t n = vrndaq_f32 (x);
+  float32x4_t r = vsubq_f32 (x, n);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
 
 #if !WANT_SIMD_EXCEPT
-  cmp = vcagtq_f32 (n, d->special_bound);
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
 #endif
 
-  r2 = vmulq_f32 (r, r);
-  p = vfmaq_f32 (C (1), C (0), r);
-  q = vfmaq_f32 (C (3), C (2), r);
+  float32x4_t c024 = vld1q_f32 (&d->c0);
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t p = vfmaq_laneq_f32 (d->c1, r, c024, 0);
+  float32x4_t q = vfmaq_laneq_f32 (d->c3, r, c024, 1);
   q = vfmaq_f32 (q, p, r2);
-  p = vmulq_f32 (C (4), r);
-  poly = vfmaq_f32 (p, q, r2);
+  p = vmulq_laneq_f32 (r, c024, 2);
+  float32x4_t poly = vfmaq_f32 (p, q, r2);
 
   if (unlikely (v_any_u32 (cmp)))
 #if WANT_SIMD_EXCEPT
@@ -111,3 +112,11 @@ float32x4_t VPCS_ATTR V_NAME_F1 (exp2) (float32x4_t x)
 
   return vfmaq_f32 (scale, poly, scale);
 }
+
+HALF_WIDTH_ALIAS_F1 (exp2)
+
+TEST_SIG (V, F, 1, exp2, -9.9, 9.9)
+TEST_ULP (V_NAME_F1 (exp2), 1.49)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (exp2), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (exp2), 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp2), 0x1p-14, 0x1p8, 500000)
diff --git a/math/aarch64/advsimd/exp2f_1u.c b/math/aarch64/advsimd/exp2f_1u.c
new file mode 100644
index 00000000000000..1f8e89ab658fa1
--- /dev/null
+++ b/math/aarch64/advsimd/exp2f_1u.c
@@ -0,0 +1,73 @@
+/*
+ * Single-precision vector 2^x function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float32x4_t c0, c1, c2, c3, c4, c5, shift;
+  uint32x4_t exponent_bias;
+  float32x4_t special_bound, scale_thresh;
+  uint32x4_t special_offset, special_bias;
+} data = {
+  .shift = V4 (0x1.8p23f),
+  .exponent_bias = V4 (0x3f800000),
+  .special_bound = V4 (126.0f),
+  .scale_thresh = V4 (192.0f),
+  .special_offset = V4 (0x82000000),
+  .special_bias = V4 (0x7f000000),
+  /*  maxerr: 0.878 ulp.  */
+  .c0 = V4 (0x1.416b5ep-13f),
+  .c1 = V4 (0x1.5f082ep-10f),
+  .c2 = V4 (0x1.3b2dep-7f),
+  .c3 = V4 (0x1.c6af7cp-5f),
+  .c4 = V4 (0x1.ebfbdcp-3f),
+  .c5 = V4 (0x1.62e43p-1f),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+specialcase (float32x4_t p, float32x4_t n, uint32x4_t e, const struct data *d)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
+  float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
+  uint32x4_t cmp = vcagtq_f32 (n, d->scale_thresh);
+  float32x4_t r1 = vmulq_f32 (s1, s1);
+  float32x4_t r0 = vmulq_f32 (vmulq_f32 (p, s1), s2);
+  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
+				| (~cmp & vreinterpretq_u32_f32 (r0)));
+}
+
+float32x4_t VPCS_ATTR
+_ZGVnN4v_exp2f_1u (float32x4_t x)
+{
+  /* exp2(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
+     x = n + r, with r in [-1/2, 1/2].  */
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t n = vrndaq_f32 (x);
+  float32x4_t r = x - n;
+  uint32x4_t e = vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)) << 23;
+  float32x4_t scale = vreinterpretq_f32_u32 (e + d->exponent_bias);
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
+
+  float32x4_t p = vfmaq_f32 (d->c1, d->c0, r);
+  p = vfmaq_f32 (d->c2, p, r);
+  p = vfmaq_f32 (d->c3, p, r);
+  p = vfmaq_f32 (d->c4, p, r);
+  p = vfmaq_f32 (d->c5, p, r);
+  p = vfmaq_f32 (v_f32 (1.0f), p, r);
+  if (unlikely (v_any_u32 (cmp)))
+    return specialcase (p, n, e, d);
+  return scale * p;
+}
+
+TEST_ULP (_ZGVnN4v_exp2f_1u, 0.4)
+TEST_DISABLE_FENV (_ZGVnN4v_exp2f_1u)
+TEST_INTERVAL (_ZGVnN4v_exp2f_1u, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (_ZGVnN4v_exp2f_1u, 0x1p-14, 0x1p8, 500000)
diff --git a/math/aarch64/v_expf.c b/math/aarch64/advsimd/expf.c
similarity index 61%
rename from math/aarch64/v_expf.c
rename to math/aarch64/advsimd/expf.c
index 34e8b6081bcd94..e5b1f020d1a02a 100644
--- a/math/aarch64/v_expf.c
+++ b/math/aarch64/advsimd/expf.c
@@ -1,30 +1,34 @@
 /*
  * Single-precision vector e^x function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
-
-#include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
-  float32x4_t poly[5];
-  float32x4_t shift, inv_ln2, ln2_hi, ln2_lo;
-  uint32x4_t exponent_bias;
+  float32x4_t c1, c3, c4, inv_ln2;
+  float ln2_hi, ln2_lo, c0, c2;
+  uint32x4_t exponent_bias, special_offset, special_bias;
 #if !WANT_SIMD_EXCEPT
   float32x4_t special_bound, scale_thresh;
 #endif
 } data = {
   /* maxerr: 1.45358 +0.5 ulp.  */
-  .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f),
-	    V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) },
-  .shift = V4 (0x1.8p23f),
+  .c0 = 0x1.0e4020p-7f,
+  .c1 = V4 (0x1.573e2ep-5f),
+  .c2 = 0x1.555e66p-3f,
+  .c3 = V4 (0x1.fffdb6p-2f),
+  .c4 = V4 (0x1.ffffecp-1f),
   .inv_ln2 = V4 (0x1.715476p+0f),
-  .ln2_hi = V4 (0x1.62e4p-1f),
-  .ln2_lo = V4 (0x1.7f7d1cp-20f),
+  .ln2_hi = 0x1.62e4p-1f,
+  .ln2_lo = 0x1.7f7d1cp-20f,
   .exponent_bias = V4 (0x3f800000),
+  .special_offset = V4 (0x82000000),
+  .special_bias = V4 (0x7f000000),
 #if !WANT_SIMD_EXCEPT
   .special_bound = V4 (126.0f),
   .scale_thresh = V4 (192.0f),
@@ -49,19 +53,17 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
 
 #else
 
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
-
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 	      float32x4_t scale, const struct data *d)
 {
   /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
-  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
   float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
   uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
   float32x4_t r2 = vmulq_f32 (s1, s1);
+  // (s2 + p*s2)*s1 = s2(p+1)s1
   float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
   /* Similar to r1 but avoids double rounding in the subnormal range.  */
   float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
@@ -71,15 +73,14 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 
 #endif
 
-float32x4_t VPCS_ATTR V_NAME_F1 (exp) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-  float32x4_t n, r, r2, scale, p, q, poly, z;
-  uint32x4_t cmp, e;
+  float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi);
 
 #if WANT_SIMD_EXCEPT
   /* asuint(x) - TinyBound >= BigBound - TinyBound.  */
-  cmp = vcgeq_u32 (
+  uint32x4_t cmp = vcgeq_u32 (
       vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
 		 TinyBound),
       SpecialBound);
@@ -93,23 +94,22 @@ float32x4_t VPCS_ATTR V_NAME_F1 (exp) (float32x4_t x)
 
   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
      x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-  z = vfmaq_f32 (d->shift, x, d->inv_ln2);
-  n = vsubq_f32 (z, d->shift);
-  r = vfmsq_f32 (x, n, d->ln2_hi);
-  r = vfmsq_f32 (r, n, d->ln2_lo);
-  e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
-  scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+  float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2));
+  float32x4_t r = vfmsq_laneq_f32 (x, n, ln2_c02, 0);
+  r = vfmsq_laneq_f32 (r, n, ln2_c02, 1);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
 
 #if !WANT_SIMD_EXCEPT
-  cmp = vcagtq_f32 (n, d->special_bound);
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
 #endif
 
-  r2 = vmulq_f32 (r, r);
-  p = vfmaq_f32 (C (1), C (0), r);
-  q = vfmaq_f32 (C (3), C (2), r);
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2);
+  float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3);
   q = vfmaq_f32 (q, p, r2);
-  p = vmulq_f32 (C (4), r);
-  poly = vfmaq_f32 (p, q, r2);
+  p = vmulq_f32 (d->c4, r);
+  float32x4_t poly = vfmaq_f32 (p, q, r2);
 
   if (unlikely (v_any_u32 (cmp)))
 #if WANT_SIMD_EXCEPT
@@ -120,3 +120,11 @@ float32x4_t VPCS_ATTR V_NAME_F1 (exp) (float32x4_t x)
 
   return vfmaq_f32 (scale, poly, scale);
 }
+
+HALF_WIDTH_ALIAS_F1 (exp)
+
+TEST_SIG (V, F, 1, exp, -9.9, 9.9)
+TEST_ULP (V_NAME_F1 (exp), 1.49)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (exp), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (exp), 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp), 0x1p-14, 0x1p8, 500000)
diff --git a/math/aarch64/advsimd/expf_1u.c b/math/aarch64/advsimd/expf_1u.c
new file mode 100644
index 00000000000000..4e114d810e08b3
--- /dev/null
+++ b/math/aarch64/advsimd/expf_1u.c
@@ -0,0 +1,79 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_math.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float32x4_t shift, inv_ln2;
+  uint32x4_t exponent_bias;
+  float32x4_t c1, c2, c3, c4;
+  float32x4_t special_bound, scale_thresh;
+  uint32x4_t special_offset, special_bias;
+  float ln2_hi, ln2_lo, c0, nothing;
+} data = {
+  .ln2_hi = 0x1.62e4p-1f,
+  .ln2_lo = 0x1.7f7d1cp-20f,
+  .shift = V4 (0x1.8p23f),
+  .inv_ln2 = V4 (0x1.715476p+0f),
+  .exponent_bias = V4 (0x3f800000),
+  .special_bound = V4 (126.0f),
+  .scale_thresh = V4 (192.0f),
+  .special_offset = V4 (0x83000000),
+  .special_bias = V4 (0x7f000000),
+  /*  maxerr: 0.36565 +0.5 ulp.  */
+  .c0 = 0x1.6a6000p-10f,
+  .c1 = V4 (0x1.12718ep-7f),
+  .c2 = V4 (0x1.555af0p-5f),
+  .c3 = V4 (0x1.555430p-3f),
+  .c4 = V4 (0x1.fffff4p-2f),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+specialcase (float32x4_t p, float32x4_t n, uint32x4_t e, const struct data *d)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
+  float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
+  uint32x4_t cmp = vcagtq_f32 (n, d->scale_thresh);
+  float32x4_t r1 = vmulq_f32 (s1, s1);
+  float32x4_t r0 = vmulq_f32 (vmulq_f32 (p, s1), s2);
+  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
+				| (~cmp & vreinterpretq_u32_f32 (r0)));
+}
+
+float32x4_t VPCS_ATTR
+_ZGVnN4v_expf_1u (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t ln2_c0 = vld1q_f32 (&d->ln2_hi);
+
+  /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
+     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+  float32x4_t z = vmulq_f32 (x, d->inv_ln2);
+  float32x4_t n = vrndaq_f32 (z);
+  float32x4_t r = vfmsq_laneq_f32 (x, n, ln2_c0, 0);
+  r = vfmsq_laneq_f32 (r, n, ln2_c0, 1);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (z)), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (e + d->exponent_bias);
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
+  float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c0, 2);
+  p = vfmaq_f32 (d->c2, p, r);
+  p = vfmaq_f32 (d->c3, p, r);
+  p = vfmaq_f32 (d->c4, p, r);
+  p = vfmaq_f32 (v_f32 (1.0f), p, r);
+  p = vfmaq_f32 (v_f32 (1.0f), p, r);
+  if (unlikely (v_any_u32 (cmp)))
+    return specialcase (p, n, e, d);
+  return scale * p;
+}
+
+TEST_ULP (_ZGVnN4v_expf_1u, 0.4)
+TEST_DISABLE_FENV (_ZGVnN4v_expf_1u)
+TEST_INTERVAL (_ZGVnN4v_expf_1u, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (_ZGVnN4v_expf_1u, 0x1p-14, 0x1p8, 500000)
diff --git a/math/aarch64/advsimd/expm1.c b/math/aarch64/advsimd/expm1.c
new file mode 100644
index 00000000000000..7535a18304277e
--- /dev/null
+++ b/math/aarch64/advsimd/expm1.c
@@ -0,0 +1,77 @@
+/*
+ * Double-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_expm1_inline.h"
+
+static const struct data
+{
+  struct v_expm1_data d;
+#if WANT_SIMD_EXCEPT
+  uint64x2_t thresh, tiny_bound;
+#else
+  float64x2_t oflow_bound;
+#endif
+} data = {
+  .d = V_EXPM1_DATA,
+#if WANT_SIMD_EXCEPT
+  /* asuint64(oflow_bound) - asuint64(0x1p-51), shifted left by 1 for abs
+     compare.  */
+  .thresh = V2 (0x78c56fa6d34b552),
+  /* asuint64(0x1p-51) << 1.  */
+  .tiny_bound = V2 (0x3cc0000000000000 << 1),
+#else
+  /* Value above which expm1(x) should overflow. Absolute value of the
+     underflow bound is greater than this, so it catches both cases - there is
+     a small window where fallbacks are triggered unnecessarily.  */
+  .oflow_bound = V2 (0x1.62b7d369a5aa9p+9),
+#endif
+};
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, uint64x2_t special, const struct data *d)
+{
+  return v_call_f64 (expm1, x, expm1_inline (v_zerofy_f64 (x, special), &d->d),
+		     special);
+}
+
+/* Double-precision vector exp(x) - 1 function.
+   The maximum error observed error is 2.05 ULP:
+  _ZGVnN2v_expm1(0x1.6329669eb8c87p-2) got 0x1.a8897eef87b34p-2
+				      want 0x1.a8897eef87b32p-2.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  /* If fp exceptions are to be triggered correctly, fall back to scalar for
+     |x| < 2^-51, |x| > oflow_bound, Inf & NaN. Add ix to itself for
+     shift-left by 1, and compare with thresh which was left-shifted offline -
+     this is effectively an absolute compare.  */
+  uint64x2_t special
+      = vcgeq_u64 (vsubq_u64 (vaddq_u64 (ix, ix), d->tiny_bound), d->thresh);
+#else
+  /* Large input, NaNs and Infs.  */
+  uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
+#endif
+
+  if (unlikely (v_any_u64 (special)))
+    return special_case (x, special, d);
+
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return expm1_inline (x, &d->d);
+}
+
+TEST_SIG (V, D, 1, expm1, -9.9, 9.9)
+TEST_ULP (V_NAME_D1 (expm1), 1.56)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (expm1), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0, 0x1p-51, 1000)
+TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1p-51, 0x1.62b7d369a5aa9p+9, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1.62b7d369a5aa9p+9, inf, 100)
diff --git a/math/aarch64/advsimd/expm1f.c b/math/aarch64/advsimd/expm1f.c
new file mode 100644
index 00000000000000..6d4431dcd8a5e3
--- /dev/null
+++ b/math/aarch64/advsimd/expm1f.c
@@ -0,0 +1,82 @@
+/*
+ * Single-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_expm1f_inline.h"
+
+static const struct data
+{
+  struct v_expm1f_data d;
+#if WANT_SIMD_EXCEPT
+  uint32x4_t thresh;
+#else
+  float32x4_t oflow_bound;
+#endif
+} data = {
+  .d = V_EXPM1F_DATA,
+#if !WANT_SIMD_EXCEPT
+  /* Value above which expm1f(x) should overflow. Absolute value of the
+     underflow bound is greater than this, so it catches both cases - there is
+     a small window where fallbacks are triggered unnecessarily.  */
+  .oflow_bound = V4 (0x1.5ebc4p+6),
+#else
+  /* asuint(oflow_bound) - asuint(0x1p-23), shifted left by 1 for absolute
+     compare.  */
+  .thresh = V4 (0x1d5ebc40),
+#endif
+};
+
+/* asuint(0x1p-23), shifted by 1 for abs compare.  */
+#define TinyBound v_u32 (0x34000000 << 1)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, uint32x4_t special, const struct data *d)
+{
+  return v_call_f32 (
+      expm1f, x, expm1f_inline (v_zerofy_f32 (x, special), &d->d), special);
+}
+
+/* Single-precision vector exp(x) - 1 function.
+   The maximum error is 1.62 ULP:
+   _ZGVnN4v_expm1f(0x1.85f83p-2) got 0x1.da9f4p-2
+				want 0x1.da9f44p-2.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  /* If fp exceptions are to be triggered correctly, fall back to scalar for
+     |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for
+     shift-left by 1, and compare with thresh which was left-shifted offline -
+     this is effectively an absolute compare.  */
+  uint32x4_t special
+      = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh);
+#else
+  /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf.  */
+  uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
+#endif
+
+  if (unlikely (v_any_u32 (special)))
+    return special_case (x, special, d);
+
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return expm1f_inline (x, &d->d);
+}
+
+HALF_WIDTH_ALIAS_F1 (expm1)
+
+TEST_SIG (V, F, 1, expm1, -9.9, 9.9)
+TEST_ULP (V_NAME_F1 (expm1), 1.13)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (expm1), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (expm1), 0, 0x1p-23, 1000)
+TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, 0x1.5ebc4p+6, 1000000)
+TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, -0x1.9bbabcp+6, 1000000)
+TEST_INTERVAL (V_NAME_F1 (expm1), 0x1.5ebc4p+6, inf, 1000)
+TEST_INTERVAL (V_NAME_F1 (expm1), -0x1.9bbabcp+6, -inf, 1000)
diff --git a/pl/math/finite_pow.h b/math/aarch64/advsimd/finite_pow.h
similarity index 94%
rename from pl/math/finite_pow.h
rename to math/aarch64/advsimd/finite_pow.h
index 8944d4fae62589..0c8350a1a77bb3 100644
--- a/pl/math/finite_pow.h
+++ b/math/aarch64/advsimd/finite_pow.h
@@ -1,7 +1,7 @@
 /*
  * Double-precision x^y function.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -108,7 +108,7 @@ special_case (double tmp, uint64_t sbits, uint64_t ki)
       sbits -= 1009ull << 52;
       scale = asdouble (sbits);
       y = 0x1p1009 * (scale + scale * tmp);
-      return check_oflow (eval_as_double (y));
+      return y;
     }
   /* k < 0, need special care in the subnormal range.  */
   sbits += 1022ull << 52;
@@ -128,7 +128,7 @@ special_case (double tmp, uint64_t sbits, uint64_t ki)
       lo = scale - y + scale * tmp;
       hi = one + y;
       lo = one - hi + y + lo;
-      y = eval_as_double (hi + lo) - one;
+      y = (hi + lo) - one;
       /* Fix the sign of 0.  */
       if (y == 0.0)
 	y = asdouble (sbits & 0x8000000000000000);
@@ -137,7 +137,7 @@ special_case (double tmp, uint64_t sbits, uint64_t ki)
     }
 #endif
   y = 0x1p-1022 * y;
-  return check_uflow (eval_as_double (y));
+  return y;
 }
 
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
@@ -192,7 +192,7 @@ exp_inline (double x, double xtail, uint32_t sign_bias)
   double scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
-  return eval_as_double (scale + scale * tmp);
+  return scale + scale * tmp;
 }
 
 /* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
@@ -239,7 +239,7 @@ exp_nosignbias (double x, double xtail)
   double scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
-  return eval_as_double (scale + scale * tmp);
+  return scale + scale * tmp;
 }
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
@@ -267,7 +267,7 @@ zeroinfnan (uint64_t i)
 }
 
 static double NOINLINE
-__pl_finite_pow (double x, double y)
+pow_scalar_special_case (double x, double y)
 {
   uint32_t sign_bias = 0;
   uint64_t ix, iy;
@@ -311,9 +311,7 @@ __pl_finite_pow (double x, double y)
 	  if (2 * ix == 0 && iy >> 63)
 	    return __math_divzero (sign_bias);
 #endif
-	  /* Without the barrier some versions of clang hoist the 1/x2 and
-	     thus division by zero exception can be signaled spuriously.  */
-	  return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
+	  return iy >> 63 ? 1 / x2 : x2;
 	}
       /* Here x and y are non-zero finite.  */
       if (ix >> 63)
@@ -349,9 +347,7 @@ __pl_finite_pow (double x, double y)
       if (topx == 0)
 	{
 	  /* Normalize subnormal x so exponent becomes negative.  */
-	  /* Without the barrier some versions of clang evalutate the mul
-	     unconditionally causing spurious overflow exceptions.  */
-	  ix = asuint64 (opt_barrier_double (x) * 0x1p52);
+	  ix = asuint64 (x * 0x1p52);
 	  ix &= 0x7fffffffffffffff;
 	  ix -= 52ULL << 52;
 	}
diff --git a/pl/math/v_hypot_1u5.c b/math/aarch64/advsimd/hypot.c
similarity index 74%
rename from pl/math/v_hypot_1u5.c
rename to math/aarch64/advsimd/hypot.c
index d4ff7be89a8fa7..dc01ed5bac931e 100644
--- a/pl/math/v_hypot_1u5.c
+++ b/math/aarch64/advsimd/hypot.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision vector hypot(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #if WANT_SIMD_EXCEPT
 static const struct data
@@ -15,7 +15,7 @@ static const struct data
   uint64x2_t tiny_bound, thres;
 } data = {
   .tiny_bound = V2 (0x2000000000000000), /* asuint (0x1p-511).  */
-  .thres = V2 (0x3fe0000000000000), /* asuint (0x1p511) - tiny_bound.  */
+  .thres = V2 (0x3fe0000000000000),	 /* asuint (0x1p511) - tiny_bound.  */
 };
 #else
 static const struct data
@@ -24,7 +24,7 @@ static const struct data
   uint32x4_t thres;
 } data = {
   .tiny_bound = V2 (0x0360000000000000), /* asuint (0x1p-969).  */
-  .thres = V4 (0x7c900000),	 /* asuint (inf) - tiny_bound.  */
+  .thres = V4 (0x7c900000),		 /* asuint (inf) - tiny_bound.  */
 };
 #endif
 
@@ -75,9 +75,9 @@ float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
 
   float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (x, x), y, y);
 
-  uint32x2_t special = vcge_u32 (
-      vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound),
-      vget_low_u32 (d->thres));
+  uint32x2_t special
+      = vcge_u32 (vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound),
+		  vget_low_u32 (d->thres));
 
   if (unlikely (v_any_u32h (special)))
     return special_case (x, y, sqsum, special);
@@ -86,10 +86,10 @@ float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
 }
 #endif
 
-PL_SIG (V, D, 2, hypot, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D2 (hypot), 1.21)
-PL_TEST_EXPECT_FENV (V_NAME_D2 (hypot), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
+TEST_SIG (V, D, 2, hypot, -10.0, 10.0)
+TEST_ULP (V_NAME_D2 (hypot), 1.21)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D2 (hypot), WANT_SIMD_EXCEPT)
+TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
diff --git a/pl/math/v_hypotf_1u5.c b/math/aarch64/advsimd/hypotf.c
similarity index 68%
rename from pl/math/v_hypotf_1u5.c
rename to math/aarch64/advsimd/hypotf.c
index 3227b0a3fd8bac..69634875be5a35 100644
--- a/pl/math/v_hypotf_1u5.c
+++ b/math/aarch64/advsimd/hypotf.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector hypot(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #if WANT_SIMD_EXCEPT
 static const struct data
@@ -15,7 +15,7 @@ static const struct data
   uint32x4_t tiny_bound, thres;
 } data = {
   .tiny_bound = V4 (0x20000000), /* asuint (0x1p-63).  */
-  .thres = V4 (0x3f000000), /* asuint (0x1p63) - tiny_bound.  */
+  .thres = V4 (0x3f000000),	 /* asuint (0x1p63) - tiny_bound.  */
 };
 #else
 static const struct data
@@ -24,7 +24,7 @@ static const struct data
   uint16x8_t thres;
 } data = {
   .tiny_bound = V4 (0x0C800000), /* asuint (0x1p-102).  */
-  .thres = V8 (0x7300), /* asuint (inf) - tiny_bound.  */
+  .thres = V8 (0x7300),		 /* asuint (inf) - tiny_bound.  */
 };
 #endif
 
@@ -41,7 +41,7 @@ special_case (float32x4_t x, float32x4_t y, float32x4_t sqsum,
 						    want 0x1.6a41dp-13.  */
 #if WANT_SIMD_EXCEPT
 
-float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -68,15 +68,15 @@ float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
 }
 #else
 
-float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
 {
   const struct data *d = ptr_barrier (&data);
 
   float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (x, x), y, y);
 
-  uint16x4_t special = vcge_u16 (
-      vsubhn_u32 (vreinterpretq_u32_f32 (sqsum), d->tiny_bound),
-      vget_low_u16 (d->thres));
+  uint16x4_t special
+      = vcge_u16 (vsubhn_u32 (vreinterpretq_u32_f32 (sqsum), d->tiny_bound),
+		  vget_low_u16 (d->thres));
 
   if (unlikely (v_any_u16h (special)))
     return special_case (x, y, sqsum, special);
@@ -85,10 +85,12 @@ float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
 }
 #endif
 
-PL_SIG (V, F, 2, hypot, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F2 (hypot), 1.21)
-PL_TEST_EXPECT_FENV (V_NAME_F2 (hypot), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
+HALF_WIDTH_ALIAS_F2 (hypot)
+
+TEST_SIG (V, F, 2, hypot, -10.0, 10.0)
+TEST_ULP (V_NAME_F2 (hypot), 1.21)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F2 (hypot), WANT_SIMD_EXCEPT)
+TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
+TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
+TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
+TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
diff --git a/math/aarch64/advsimd/log.c b/math/aarch64/advsimd/log.c
new file mode 100644
index 00000000000000..94e3f448207987
--- /dev/null
+++ b/math/aarch64/advsimd/log.c
@@ -0,0 +1,118 @@
+/*
+ * Double-precision vector log(x) function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
+
+static const struct data
+{
+  uint64x2_t off, sign_exp_mask, offset_lower_bound;
+  uint32x4_t special_bound;
+  float64x2_t c0, c2;
+  double c1, c3, ln2, c4;
+} data = {
+  /* Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
+  .c0 = V2 (-0x1.ffffffffffff7p-2),
+  .c1 = 0x1.55555555170d4p-2,
+  .c2 = V2 (-0x1.0000000399c27p-2),
+  .c3 = 0x1.999b2e90e94cap-3,
+  .c4 = -0x1.554e550bd501ep-3,
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .sign_exp_mask = V2 (0xfff0000000000000),
+  .off = V2 (0x3fe6900900000000),
+  /* Lower bound is 0x0010000000000000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound - offset (which wraps around).  */
+  .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000),
+  .special_bound = V4 (0x7fe00000), /* asuint64(inf) -  asuint64(0x1p-126).  */
+};
+
+#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t logc;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  /* Since N is a power of 2, n % N = n & (N - 1).  */
+  struct entry e;
+  uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.logc = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2,
+	      uint32x2_t special, const struct data *d)
+{
+  float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off));
+  return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special));
+}
+
+/* Double-precision vector log routine.
+   The maximum observed error is 2.17 ULP:
+   _ZGVnN2v_log(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
+				     want 0x1.ffffff1cca045p-2.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint64x2_t u = vreinterpretq_u64_f64 (x);
+  uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+  uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+  struct entry e = lookup (u_off);
+
+  uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound),
+				 vget_low_u32 (d->special_bound));
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+
+  /* hi = r + log(c) + k*Ln2.  */
+  float64x2_t ln2_and_c4 = vld1q_f64 (&d->ln2);
+  float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_c4, 0);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  float64x2_t odd_coeffs = vld1q_f64 (&d->c1);
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1);
+  float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0);
+  y = vfmaq_laneq_f64 (y, r2, ln2_and_c4, 1);
+  y = vfmaq_f64 (p, r2, y);
+
+  if (unlikely (v_any_u32h (special)))
+    return special_case (hi, u_off, y, r2, special, d);
+  return vfmaq_f64 (hi, y, r2);
+}
+
+TEST_SIG (V, D, 1, log, 0.01, 11.1)
+TEST_ULP (V_NAME_D1 (log), 1.67)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (log), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (log), 0, 0xffff000000000000, 10000)
+TEST_INTERVAL (V_NAME_D1 (log), 0x1p-4, 0x1p4, 400000)
+TEST_INTERVAL (V_NAME_D1 (log), 0, inf, 400000)
diff --git a/math/aarch64/advsimd/log10.c b/math/aarch64/advsimd/log10.c
new file mode 100644
index 00000000000000..c2b8f1c54f0e91
--- /dev/null
+++ b/math/aarch64/advsimd/log10.c
@@ -0,0 +1,132 @@
+/*
+ * Double-precision vector log10(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  uint64x2_t off, sign_exp_mask, offset_lower_bound;
+  uint32x4_t special_bound;
+  double invln10, log10_2;
+  double c1, c3;
+  float64x2_t c0, c2, c4;
+} data = {
+  /* Computed from log coefficients divided by log(10) then rounded to double
+     precision.  */
+  .c0 = V2 (-0x1.bcb7b1526e506p-3),
+  .c1 = 0x1.287a7636be1d1p-3,
+  .c2 = V2 (-0x1.bcb7b158af938p-4),
+  .c3 = 0x1.63c78734e6d07p-4,
+  .c4 = V2 (-0x1.287461742fee4p-4),
+  .invln10 = 0x1.bcb7b1526e50ep-2,
+  .log10_2 = 0x1.34413509f79ffp-2,
+  .off = V2 (0x3fe6900900000000),
+  .sign_exp_mask = V2 (0xfff0000000000000),
+  /* Lower bound is 0x0010000000000000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound - offset (which wraps around).  */
+  .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000),
+  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - 0x0010000000000000.  */
+};
+
+#define N (1 << V_LOG10_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t log10c;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  struct entry e;
+  uint64_t i0
+      = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
+  uint64_t i1
+      = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log10_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log10_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.log10c = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2,
+	      uint32x2_t special, const struct data *d)
+{
+  float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off));
+  return v_call_f64 (log10, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special));
+}
+
+/* Fast implementation of double-precision vector log10
+   is a slight modification of double-precision vector log.
+   Max ULP error: < 2.5 ulp (nearest rounding.)
+   Maximum measured at 2.46 ulp for x in [0.96, 0.97]
+   _ZGVnN2v_log10(0x1.13192407fcb46p+0) got 0x1.fff6be3cae4bbp-6
+				       want 0x1.fff6be3cae4b9p-6.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint64x2_t u = vreinterpretq_u64_f64 (x);
+  uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+  uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+  struct entry e = lookup (u_off);
+
+  uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound),
+				 vget_low_u32 (d->special_bound));
+
+  /* log10(x) = log1p(z/c-1)/log(10) + log10(c) + k*log10(2).  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+
+  /* hi = r / log(10) + log10(c) + k*log10(2).
+     Constants in v_log10_data.c are computed (in extended precision) as
+     e.log10c := e.logc * invln10.  */
+  float64x2_t cte = vld1q_f64 (&d->invln10);
+  float64x2_t hi = vfmaq_laneq_f64 (e.log10c, r, cte, 0);
+
+  /* y = log10(1+r) + n * log10(2).  */
+  hi = vfmaq_laneq_f64 (hi, kd, cte, 1);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t odd_coeffs = vld1q_f64 (&d->c1);
+  float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1);
+  float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0);
+  y = vfmaq_f64 (y, d->c4, r2);
+  y = vfmaq_f64 (p, y, r2);
+
+  if (unlikely (v_any_u32h (special)))
+    return special_case (hi, u_off, y, r2, special, d);
+  return vfmaq_f64 (hi, y, r2);
+}
+
+TEST_SIG (V, D, 1, log10, 0.01, 11.1)
+TEST_ULP (V_NAME_D1 (log10), 1.97)
+TEST_INTERVAL (V_NAME_D1 (log10), -0.0, -inf, 1000)
+TEST_INTERVAL (V_NAME_D1 (log10), 0, 0x1p-149, 1000)
+TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (log10), 1.0, 100, 50000)
+TEST_INTERVAL (V_NAME_D1 (log10), 100, inf, 50000)
diff --git a/math/aarch64/advsimd/log10f.c b/math/aarch64/advsimd/log10f.c
new file mode 100644
index 00000000000000..907c1051e0864c
--- /dev/null
+++ b/math/aarch64/advsimd/log10f.c
@@ -0,0 +1,106 @@
+/*
+ * Single-precision vector log10 function.
+ *
+ * Copyright (c) 2020-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float32x4_t c0, c2, c4, c6, inv_ln10, ln2;
+  uint32x4_t off, offset_lower_bound;
+  uint16x8_t special_bound;
+  uint32x4_t mantissa_mask;
+  float c1, c3, c5, c7;
+} data = {
+  /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in
+      [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25.  */
+  .c0 = V4 (-0x1.bcb79cp-3f),
+  .c1 = 0x1.2879c8p-3f,
+  .c2 = V4 (-0x1.bcd472p-4f),
+  .c3 = 0x1.6408f8p-4f,
+  .c4 = V4 (-0x1.246f8p-4f),
+  .c5 = 0x1.f0e514p-5f,
+  .c6 = V4 (-0x1.0fc92cp-4f),
+  .c7 = 0x1.f5f76ap-5f,
+  .ln2 = V4 (0x1.62e43p-1f),
+  .inv_ln10 = V4 (0x1.bcb7b2p-2f),
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
+  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
+  .mantissa_mask = V4 (0x007fffff),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2,
+	      uint16x4_t cmp, const struct data *d)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (log10f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (y, p, r2), vmovl_u16 (cmp));
+}
+
+/* Fast implementation of AdvSIMD log10f,
+   uses a similar approach as AdvSIMD logf with the same offset (i.e., 2/3) and
+   an order 9 polynomial.
+   Maximum error: 3.305ulps (nearest rounding.)
+   _ZGVnN4v_log10f(0x1.555c16p+0) got 0x1.ffe2fap-4
+				 want 0x1.ffe2f4p-4.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t c1357 = vld1q_f32 (&d->c1);
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint32x4_t u_off = vreinterpretq_u32_f32 (x);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u_off = vsubq_u32 (u_off, d->off);
+  float32x4_t n = vcvtq_f32_s32 (
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+				 vget_low_u16 (d->special_bound));
+
+  uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off);
+  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+  /* y = log10(1+r) + n * log10(2).  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+
+  float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0);
+  float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1);
+  float32x4_t c45 = vfmaq_laneq_f32 (d->c4, r, c1357, 2);
+  float32x4_t c67 = vfmaq_laneq_f32 (d->c6, r, c1357, 3);
+
+  float32x4_t p47 = vfmaq_f32 (c45, r2, c67);
+  float32x4_t p27 = vfmaq_f32 (c23, r2, p47);
+  float32x4_t poly = vfmaq_f32 (c01, r2, p27);
+
+  /* y = Log10(2) * n + poly * InvLn(10).  */
+  float32x4_t y = vfmaq_f32 (r, d->ln2, n);
+  y = vmulq_f32 (y, d->inv_ln10);
+
+  if (unlikely (v_any_u16h (special)))
+    return special_case (y, u_off, poly, r2, special, d);
+  return vfmaq_f32 (y, poly, r2);
+}
+
+HALF_WIDTH_ALIAS_F1 (log10)
+
+TEST_SIG (V, F, 1, log10, 0.01, 11.1)
+TEST_ULP (V_NAME_F1 (log10), 2.81)
+TEST_INTERVAL (V_NAME_F1 (log10), -0.0, -inf, 100)
+TEST_INTERVAL (V_NAME_F1 (log10), 0, 0x1p-126, 100)
+TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (V_NAME_F1 (log10), 1.0, 100, 50000)
+TEST_INTERVAL (V_NAME_F1 (log10), 100, inf, 50000)
diff --git a/math/aarch64/advsimd/log1p.c b/math/aarch64/advsimd/log1p.c
new file mode 100644
index 00000000000000..42a0c579392052
--- /dev/null
+++ b/math/aarch64/advsimd/log1p.c
@@ -0,0 +1,61 @@
+/*
+ * Double-precision vector log(1+x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define WANT_V_LOG1P_K0_SHORTCUT 0
+#include "v_log1p_inline.h"
+
+const static struct data
+{
+  struct v_log1p_data d;
+  uint64x2_t inf, minus_one;
+} data = { .d = V_LOG1P_CONSTANTS_TABLE,
+	   .inf = V2 (0x7ff0000000000000),
+	   .minus_one = V2 (0xbff0000000000000) };
+
+#define BottomMask v_u64 (0xffffffff)
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, uint64x2_t cmp, const struct data *d)
+{
+  /* Side-step special lanes so fenv exceptions are not triggered
+     inadvertently.  */
+  float64x2_t x_nospecial = v_zerofy_f64 (x, cmp);
+  return v_call_f64 (log1p, x, log1p_inline (x_nospecial, &d->d), cmp);
+}
+
+/* Vector log1p approximation using polynomial on reduced interval. Routine is
+   a modification of the algorithm used in scalar log1p, with no shortcut for
+   k=0 and no narrowing for f and k. Maximum observed error is 2.45 ULP:
+   _ZGVnN2v_log1p(0x1.658f7035c4014p+11) got 0x1.fd61d0727429dp+2
+					want 0x1.fd61d0727429fp+2 .  */
+VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+  uint64x2_t special_cases
+      = vorrq_u64 (vcgeq_u64 (ia, d->inf), vcgeq_u64 (ix, d->minus_one));
+
+  if (unlikely (v_any_u64 (special_cases)))
+    return special_case (x, special_cases, d);
+
+  return log1p_inline (x, &d->d);
+}
+
+TEST_SIG (V, D, 1, log1p, -0.9, 10.0)
+TEST_ULP (V_NAME_D1 (log1p), 1.95)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (log1p), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.001, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (log1p), 1, inf, 40000)
+TEST_INTERVAL (V_NAME_D1 (log1p), -1.0, -inf, 500)
diff --git a/math/aarch64/advsimd/log1pf.c b/math/aarch64/advsimd/log1pf.c
new file mode 100644
index 00000000000000..94b90249128fa4
--- /dev/null
+++ b/math/aarch64/advsimd/log1pf.c
@@ -0,0 +1,92 @@
+/*
+ * Single-precision vector log(1+x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_log1pf_inline.h"
+
+#if WANT_SIMD_EXCEPT
+
+const static struct data
+{
+  uint32x4_t minus_one, thresh;
+  struct v_log1pf_data d;
+} data = {
+  .d = V_LOG1PF_CONSTANTS_TABLE,
+  .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - TinyBound.  */
+  .minus_one = V4 (0xbf800000),
+};
+
+/* asuint32(0x1p-23). ulp=0.5 at 0x1p-23.  */
+#  define TinyBound v_u32 (0x34000000)
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, uint32x4_t cmp, const struct data *d)
+{
+  /* Side-step special lanes so fenv exceptions are not triggered
+     inadvertently.  */
+  float32x4_t x_nospecial = v_zerofy_f32 (x, cmp);
+  return v_call_f32 (log1pf, x, log1pf_inline (x_nospecial, &d->d), cmp);
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+   error is 1.69 ULP:
+   _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3
+				 want 0x1.cfcbdcp-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log1p) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
+
+  uint32x4_t special_cases
+      = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, TinyBound), d->thresh),
+		   vcgeq_u32 (ix, d->minus_one));
+
+  if (unlikely (v_any_u32 (special_cases)))
+    return special_case (x, special_cases, d);
+
+  return log1pf_inline (x, &d->d);
+}
+
+#else
+
+const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE;
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, uint32x4_t cmp)
+{
+  return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp);
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+   error is 1.63 ULP:
+   _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3
+				 want 0x1.fdcb16p-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log1p) (float32x4_t x)
+{
+  uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)),
+					vcaleq_f32 (x, v_f32 (0x1p127f)));
+
+  if (unlikely (v_any_u32 (special_cases)))
+    return special_case (x, special_cases);
+
+  return log1pf_inline (x, ptr_barrier (&data));
+}
+
+#endif
+
+HALF_WIDTH_ALIAS_F1 (log1p)
+
+TEST_SIG (V, F, 1, log1p, -0.9, 10.0)
+TEST_ULP (V_NAME_F1 (log1p), 1.20)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (log1p), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0.0, 0x1p-23, 30000)
+TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0x1p-23, 1, 50000)
+TEST_INTERVAL (V_NAME_F1 (log1p), 1, inf, 50000)
+TEST_INTERVAL (V_NAME_F1 (log1p), -1.0, -inf, 1000)
diff --git a/math/aarch64/advsimd/log2.c b/math/aarch64/advsimd/log2.c
new file mode 100644
index 00000000000000..7d2e44dad2c9ef
--- /dev/null
+++ b/math/aarch64/advsimd/log2.c
@@ -0,0 +1,123 @@
+/*
+ * Double-precision vector log2 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  uint64x2_t off, sign_exp_mask, offset_lower_bound;
+  uint32x4_t special_bound;
+  float64x2_t c0, c2;
+  double c1, c3, invln2, c4;
+} data = {
+  /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9
+     and N = 128, then scaled by log2(e) in extended precision and rounded back
+     to double precision.  */
+  .c0 = V2 (-0x1.71547652b8300p-1),
+  .c1 = 0x1.ec709dc340953p-2,
+  .c2 = V2 (-0x1.71547651c8f35p-2),
+  .c3 = 0x1.2777ebe12dda5p-2,
+  .c4 = -0x1.ec738d616fe26p-3,
+  .invln2 = 0x1.71547652b82fep0,
+  .off = V2 (0x3fe6900900000000),
+  .sign_exp_mask = V2 (0xfff0000000000000),
+  /* Lower bound is 0x0010000000000000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound - offset (which wraps around).  */
+  .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000),
+  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - asuint64(0x1p-1022).  */
+};
+
+#define N (1 << V_LOG2_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t log2c;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  struct entry e;
+  uint64_t i0
+      = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
+  uint64_t i1
+      = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log2_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log2_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.log2c = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2,
+	      uint32x2_t special, const struct data *d)
+{
+  float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off));
+  return v_call_f64 (log2, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special));
+}
+
+/* Double-precision vector log2 routine. Implements the same algorithm as
+   vector log10, with coefficients and table entries scaled in extended
+   precision. The maximum observed error is 2.58 ULP:
+   _ZGVnN2v_log2(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
+				      want 0x1.fffb34198d9ddp-5.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (log2) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint64x2_t u = vreinterpretq_u64_f64 (x);
+  uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+  uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+  struct entry e = lookup (u_off);
+
+  uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound),
+				 vget_low_u32 (d->special_bound));
+
+  /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+
+  float64x2_t invln2_and_c4 = vld1q_f64 (&d->invln2);
+  float64x2_t hi
+      = vfmaq_laneq_f64 (vaddq_f64 (e.log2c, kd), r, invln2_and_c4, 0);
+
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t odd_coeffs = vld1q_f64 (&d->c1);
+  float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1);
+  float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0);
+  y = vfmaq_laneq_f64 (y, r2, invln2_and_c4, 1);
+  y = vfmaq_f64 (p, r2, y);
+
+  if (unlikely (v_any_u32h (special)))
+    return special_case (hi, u_off, y, r2, special, d);
+  return vfmaq_f64 (hi, y, r2);
+}
+
+TEST_SIG (V, D, 1, log2, 0.01, 11.1)
+TEST_ULP (V_NAME_D1 (log2), 2.09)
+TEST_INTERVAL (V_NAME_D1 (log2), -0.0, -0x1p126, 100)
+TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (log2), 1.0, 100, 50000)
+TEST_INTERVAL (V_NAME_D1 (log2), 100, inf, 50000)
diff --git a/math/aarch64/advsimd/log2f.c b/math/aarch64/advsimd/log2f.c
new file mode 100644
index 00000000000000..3053c64bc552c4
--- /dev/null
+++ b/math/aarch64/advsimd/log2f.c
@@ -0,0 +1,102 @@
+/*
+ * Single-precision vector log2 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float32x4_t c0, c2, c4, c6, c8;
+  uint32x4_t off, offset_lower_bound;
+  uint16x8_t special_bound;
+  uint32x4_t mantissa_mask;
+  float c1, c3, c5, c7;
+} data = {
+  /* Coefficients generated using Remez algorithm approximate
+     log2(1+r)/r for r in [ -1/3, 1/3 ].
+     rel error: 0x1.c4c4b0cp-26.  */
+  .c0 = V4 (0x1.715476p0f), /* (float)(1 / ln(2)).  */
+  .c1 = -0x1.715458p-1f,
+  .c2 = V4 (0x1.ec701cp-2f),
+  .c3 = -0x1.7171a4p-2f,
+  .c4 = V4 (0x1.27a0b8p-2f),
+  .c5 = -0x1.e5143ep-3f,
+  .c6 = V4 (0x1.9d8ecap-3f),
+  .c7 = -0x1.c675bp-3f,
+  .c8 = V4 (0x1.9e495p-3f),
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
+  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
+  .mantissa_mask = V4 (0x007fffff),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t n, uint32x4_t u_off, float32x4_t p, float32x4_t r,
+	      uint16x4_t cmp, const struct data *d)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (log2f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (n, p, r), vmovl_u16 (cmp));
+}
+
+/* Fast implementation for single precision AdvSIMD log2,
+   relies on same argument reduction as AdvSIMD logf.
+   Maximum error: 2.48 ULPs
+   _ZGVnN4v_log2f(0x1.558174p+0) got 0x1.a9be84p-2
+				want 0x1.a9be8p-2.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint32x4_t u_off = vreinterpretq_u32_f32 (x);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u_off = vsubq_u32 (u_off, d->off);
+  float32x4_t n = vcvtq_f32_s32 (
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+				 vget_low_u16 (d->special_bound));
+
+  uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off);
+  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+  /* y = log2(1+r) + n.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+
+  float32x4_t c1357 = vld1q_f32 (&d->c1);
+  float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0);
+  float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1);
+  float32x4_t c45 = vfmaq_laneq_f32 (d->c4, r, c1357, 2);
+  float32x4_t c67 = vfmaq_laneq_f32 (d->c6, r, c1357, 3);
+  float32x4_t p68 = vfmaq_f32 (c67, r2, d->c8);
+  float32x4_t p48 = vfmaq_f32 (c45, r2, p68);
+  float32x4_t p28 = vfmaq_f32 (c23, r2, p48);
+  float32x4_t p = vfmaq_f32 (c01, r2, p28);
+
+  if (unlikely (v_any_u16h (special)))
+    return special_case (n, u_off, p, r, special, d);
+  return vfmaq_f32 (n, p, r);
+}
+
+HALF_WIDTH_ALIAS_F1 (log2)
+
+TEST_SIG (V, F, 1, log2, 0.01, 11.1)
+TEST_ULP (V_NAME_F1 (log2), 1.99)
+TEST_INTERVAL (V_NAME_F1 (log2), -0.0, -0x1p126, 100)
+TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (V_NAME_F1 (log2), 1.0, 100, 50000)
+TEST_INTERVAL (V_NAME_F1 (log2), 100, inf, 50000)
diff --git a/math/aarch64/advsimd/logf.c b/math/aarch64/advsimd/logf.c
new file mode 100644
index 00000000000000..84705fad05eee7
--- /dev/null
+++ b/math/aarch64/advsimd/logf.c
@@ -0,0 +1,88 @@
+/*
+ * Single-precision vector log function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
+
+static const struct data
+{
+  float32x4_t c2, c4, c6, ln2;
+  uint32x4_t off, offset_lower_bound, mantissa_mask;
+  uint16x8_t special_bound;
+  float c1, c3, c5, c0;
+} data = {
+  /* 3.34 ulp error.  */
+  .c0 = -0x1.3e737cp-3f,
+  .c1 = 0x1.5a9aa2p-3f,
+  .c2 = V4 (-0x1.4f9934p-3f),
+  .c3 = 0x1.961348p-3f,
+  .c4 = V4 (-0x1.00187cp-2f),
+  .c5 = 0x1.555d7cp-2f,
+  .c6 = V4 (-0x1.ffffc8p-2f),
+  .ln2 = V4 (0x1.62e43p-1f),
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
+  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
+  .mantissa_mask = V4 (0x007fffff)
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2,
+	      uint16x4_t cmp, const struct data *d)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (logf, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
+}
+
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t c1350 = vld1q_f32 (&d->c1);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint32x4_t u_off = vsubq_u32 (vreinterpretq_u32_f32 (x), d->off);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  float32x4_t n = vcvtq_f32_s32 (
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+  uint16x4_t cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+			     vget_low_u16 (d->special_bound));
+
+  uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off);
+  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+  /* y = log(1+r) + n*ln2.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
+  float32x4_t p = vfmaq_laneq_f32 (d->c2, r, c1350, 0);
+  float32x4_t q = vfmaq_laneq_f32 (d->c4, r, c1350, 1);
+  float32x4_t y = vfmaq_laneq_f32 (d->c6, r, c1350, 2);
+  p = vfmaq_laneq_f32 (p, r2, c1350, 3);
+
+  q = vfmaq_f32 (q, p, r2);
+  y = vfmaq_f32 (y, q, r2);
+  p = vfmaq_f32 (r, d->ln2, n);
+
+  if (unlikely (v_any_u16h (cmp)))
+    return special_case (p, u_off, y, r2, cmp, d);
+  return vfmaq_f32 (p, y, r2);
+}
+
+HALF_WIDTH_ALIAS_F1 (log)
+
+TEST_SIG (V, F, 1, log, 0.01, 11.1)
+TEST_ULP (V_NAME_F1 (log), 2.9)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (log), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (log), 0, 0xffff0000, 10000)
+TEST_INTERVAL (V_NAME_F1 (log), 0x1p-4, 0x1p4, 500000)
+TEST_INTERVAL (V_NAME_F1 (log), 0, inf, 50000)
diff --git a/math/aarch64/advsimd/modf.c b/math/aarch64/advsimd/modf.c
new file mode 100644
index 00000000000000..da2fcbff851497
--- /dev/null
+++ b/math/aarch64/advsimd/modf.c
@@ -0,0 +1,33 @@
+/*
+ * Double-precision vector modf(x, *y) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+/* Modf algorithm. Produces exact values in all rounding modes.  */
+float64x2_t VPCS_ATTR V_NAME_D1_L1 (modf) (float64x2_t x, double *out_int)
+{
+  /* Get integer component of x.  */
+  float64x2_t rounded = vrndq_f64 (x);
+  vst1q_f64 (out_int, rounded);
+
+  /* Subtract integer component from input.  */
+  uint64x2_t remaining = vreinterpretq_u64_f64 (vsubq_f64 (x, rounded));
+
+  /* Return +0 for integer x.  */
+  uint64x2_t is_integer = vceqq_f64 (x, rounded);
+  return vreinterpretq_f64_u64 (vbicq_u64 (remaining, is_integer));
+}
+
+TEST_ULP (_ZGVnN2vl8_modf_frac, 0.0)
+TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_frac, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_frac, 1, inf, 20000)
+
+TEST_ULP (_ZGVnN2vl8_modf_int, 0.0)
+TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_int, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_int, 1, inf, 20000)
diff --git a/math/aarch64/advsimd/modff.c b/math/aarch64/advsimd/modff.c
new file mode 100644
index 00000000000000..0a646b24cb1ae1
--- /dev/null
+++ b/math/aarch64/advsimd/modff.c
@@ -0,0 +1,34 @@
+/*
+ * Single-precision vector modf(x, *y) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+/* Modff algorithm. Produces exact values in all rounding modes.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1_L1 (modf) (float32x4_t x,
+						    float *out_int)
+{
+  /* Get integer component of x.  */
+  float32x4_t rounded = vrndq_f32 (x);
+  vst1q_f32 (out_int, rounded);
+
+  /* Subtract integer component from input.  */
+  uint32x4_t remaining = vreinterpretq_u32_f32 (vsubq_f32 (x, rounded));
+
+  /* Return +0 for integer x.  */
+  uint32x4_t is_integer = vceqq_f32 (x, rounded);
+  return vreinterpretq_f32_u32 (vbicq_u32 (remaining, is_integer));
+}
+
+TEST_ULP (_ZGVnN4vl4_modff_frac, 0.0)
+TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_frac, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_frac, 1, inf, 20000)
+
+TEST_ULP (_ZGVnN4vl4_modff_int, 0.0)
+TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_int, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_int, 1, inf, 20000)
diff --git a/pl/math/v_pow_1u5.c b/math/aarch64/advsimd/pow.c
similarity index 60%
rename from pl/math/v_pow_1u5.c
rename to math/aarch64/advsimd/pow.c
index 9053347d4e3524..db9d6e9ba14bb9 100644
--- a/pl/math/v_pow_1u5.c
+++ b/math/aarch64/advsimd/pow.c
@@ -1,20 +1,17 @@
 /*
  * Double-precision vector pow function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Defines parameters of the approximation and scalar fallback.  */
 #include "finite_pow.h"
 
-#define VecSmallExp v_u64 (SmallExp)
-#define VecThresExp v_u64 (ThresExp)
-
 #define VecSmallPowX v_u64 (SmallPowX)
 #define VecThresPowX v_u64 (ThresPowX)
 #define VecSmallPowY v_u64 (SmallPowY)
@@ -22,34 +19,49 @@
 
 static const struct data
 {
-  float64x2_t log_poly[7];
-  float64x2_t exp_poly[3];
-  float64x2_t ln2_hi, ln2_lo;
-  float64x2_t shift, inv_ln2_n, ln2_hi_n, ln2_lo_n;
+  uint64x2_t inf;
+  float64x2_t small_powx;
+  uint64x2_t offset, mask;
+  uint64x2_t mask_sub_0, mask_sub_1;
+  float64x2_t log_c0, log_c2, log_c4, log_c5;
+  double log_c1, log_c3;
+  double ln2_lo, ln2_hi;
+  uint64x2_t small_exp, thres_exp;
+  double ln2_lo_n, ln2_hi_n;
+  double inv_ln2_n, exp_c2;
+  float64x2_t exp_c0, exp_c1;
 } data = {
+  /* Power threshold.  */
+  .inf = V2 (0x7ff0000000000000),
+  .small_powx = V2 (0x1p-126),
+  .offset = V2 (Off),
+  .mask = V2 (0xfffULL << 52),
+  .mask_sub_0 = V2 (1ULL << 52),
+  .mask_sub_1 = V2 (52ULL << 52),
   /* Coefficients copied from v_pow_log_data.c
      relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]
      Coefficients are scaled to match the scaling during evaluation.  */
-  .log_poly = { V2 (-0x1p-1), V2 (0x1.555555555556p-2 * -2),
-		V2 (-0x1.0000000000006p-2 * -2), V2 (0x1.999999959554ep-3 * 4),
-		V2 (-0x1.555555529a47ap-3 * 4), V2 (0x1.2495b9b4845e9p-3 * -8),
-		V2 (-0x1.0002b8b263fc3p-3 * -8) },
-  .ln2_hi = V2 (0x1.62e42fefa3800p-1),
-  .ln2_lo = V2 (0x1.ef35793c76730p-45),
+  .log_c0 = V2 (0x1.555555555556p-2 * -2),
+  .log_c1 = -0x1.0000000000006p-2 * -2,
+  .log_c2 = V2 (0x1.999999959554ep-3 * 4),
+  .log_c3 = -0x1.555555529a47ap-3 * 4,
+  .log_c4 = V2 (0x1.2495b9b4845e9p-3 * -8),
+  .log_c5 = V2 (-0x1.0002b8b263fc3p-3 * -8),
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
   /* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549
      (0.550 without fma) if |x| < ln2/512.  */
-  .exp_poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6ef9p-3),
-		V2 (0x1.5555576a5adcep-5) },
-  .shift = V2 (0x1.8p52), /* round to nearest int. without intrinsics.  */
-  .inv_ln2_n = V2 (0x1.71547652b82fep8), /* N/ln2.  */
-  .ln2_hi_n = V2 (0x1.62e42fefc0000p-9), /* ln2/N.  */
-  .ln2_lo_n = V2 (-0x1.c610ca86c3899p-45),
+  .exp_c0 = V2 (0x1.fffffffffffd4p-2),
+  .exp_c1 = V2 (0x1.5555571d6ef9p-3),
+  .exp_c2 = 0x1.5555576a5adcep-5,
+  .small_exp = V2 (0x3c90000000000000),
+  .thres_exp = V2 (0x03f0000000000000),
+  .inv_ln2_n = 0x1.71547652b82fep8, /* N/ln2.  */
+  .ln2_hi_n = 0x1.62e42fefc0000p-9, /* ln2/N.  */
+  .ln2_lo_n = -0x1.c610ca86c3899p-45,
 };
 
-#define A(i) data.log_poly[i]
-#define C(i) data.exp_poly[i]
-
-/* This version implements an algorithm close to AOR scalar pow but
+/* This version implements an algorithm close to scalar pow but
    - does not implement the trick in the exp's specialcase subroutine to avoid
      double-rounding,
    - does not use a tail in the exponential core computation,
@@ -78,10 +90,9 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
   /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
-  uint64x2_t tmp = vsubq_u64 (ix, v_u64 (Off));
-  int64x2_t k
-      = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
-  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, v_u64 (0xfffULL << 52)));
+  uint64x2_t tmp = vsubq_u64 (ix, d->offset);
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
+  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->mask));
   float64x2_t z = vreinterpretq_f64_u64 (iz);
   float64x2_t kd = vcvtq_f64_s64 (k);
   /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
@@ -92,12 +103,13 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
   float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc);
   /* k*Ln2 + log(c) + r.  */
-  float64x2_t t1 = vfmaq_f64 (logc, kd, d->ln2_hi);
+  float64x2_t ln2 = vld1q_f64 (&d->ln2_lo);
+  float64x2_t t1 = vfmaq_laneq_f64 (logc, kd, ln2, 1);
   float64x2_t t2 = vaddq_f64 (t1, r);
-  float64x2_t lo1 = vfmaq_f64 (logctail, kd, d->ln2_lo);
+  float64x2_t lo1 = vfmaq_laneq_f64 (logctail, kd, ln2, 0);
   float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r);
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
-  float64x2_t ar = vmulq_f64 (A (0), r);
+  float64x2_t ar = vmulq_f64 (v_f64 (-0.5), r);
   float64x2_t ar2 = vmulq_f64 (r, ar);
   float64x2_t ar3 = vmulq_f64 (r, ar2);
   /* k*Ln2 + log(c) + r + A[0]*r*r.  */
@@ -105,9 +117,10 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
   float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r);
   float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2);
   /* p = log1p(r) - r - A[0]*r*r.  */
-  float64x2_t a56 = vfmaq_f64 (A (5), r, A (6));
-  float64x2_t a34 = vfmaq_f64 (A (3), r, A (4));
-  float64x2_t a12 = vfmaq_f64 (A (1), r, A (2));
+  float64x2_t odd_coeffs = vld1q_f64 (&d->log_c1);
+  float64x2_t a56 = vfmaq_f64 (d->log_c4, r, d->log_c5);
+  float64x2_t a34 = vfmaq_laneq_f64 (d->log_c2, r, odd_coeffs, 1);
+  float64x2_t a12 = vfmaq_laneq_f64 (d->log_c0, r, odd_coeffs, 0);
   float64x2_t p = vfmaq_f64 (a34, ar2, a56);
   p = vfmaq_f64 (a12, ar2, p);
   p = vmulq_f64 (ar3, p);
@@ -118,29 +131,37 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
   return y;
 }
 
+static float64x2_t VPCS_ATTR NOINLINE
+exp_special_case (float64x2_t x, float64x2_t xtail)
+{
+  return (float64x2_t){ exp_nosignbias (x[0], xtail[0]),
+			exp_nosignbias (x[1], xtail[1]) };
+}
+
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.  */
 static inline float64x2_t
-v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d)
+v_exp_inline (float64x2_t x, float64x2_t neg_xtail, const struct data *d)
 {
   /* Fallback to scalar exp_inline for all lanes if any lane
      contains value of x s.t. |x| <= 2^-54 or >= 512.  */
-  uint64x2_t abstop
-      = vandq_u64 (vshrq_n_u64 (vreinterpretq_u64_f64 (x), 52), v_u64 (0x7ff));
-  uint64x2_t uoflowx
-      = vcgeq_u64 (vsubq_u64 (abstop, VecSmallExp), VecThresExp);
+  uint64x2_t uoflowx = vcgeq_u64 (
+      vsubq_u64 (vreinterpretq_u64_f64 (vabsq_f64 (x)), d->small_exp),
+      d->thres_exp);
   if (unlikely (v_any_u64 (uoflowx)))
-    return v_call2_f64 (exp_nosignbias, x, xtail, x, v_u64 (-1));
+    return exp_special_case (x, vnegq_f64 (neg_xtail));
+
   /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
   /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N].  */
-  float64x2_t z = vmulq_f64 (d->inv_ln2_n, x);
   /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
-  float64x2_t kd = vaddq_f64 (z, d->shift);
-  uint64x2_t ki = vreinterpretq_u64_f64 (kd);
-  kd = vsubq_f64 (kd, d->shift);
-  float64x2_t r = vfmsq_f64 (x, kd, d->ln2_hi_n);
-  r = vfmsq_f64 (r, kd, d->ln2_lo_n);
+  float64x2_t exp_consts = vld1q_f64 (&d->inv_ln2_n);
+  float64x2_t z = vmulq_laneq_f64 (x, exp_consts, 0);
+  float64x2_t kd = vrndnq_f64 (z);
+  uint64x2_t ki = vreinterpretq_u64_s64 (vcvtaq_s64_f64 (z));
+  float64x2_t ln2_n = vld1q_f64 (&d->ln2_lo_n);
+  float64x2_t r = vfmsq_laneq_f64 (x, kd, ln2_n, 1);
+  r = vfmsq_laneq_f64 (r, kd, ln2_n, 0);
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  r = vaddq_f64 (r, xtail);
+  r = vsubq_f64 (r, neg_xtail);
   /* 2^(k/N) ~= scale.  */
   uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1));
   uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS);
@@ -149,8 +170,8 @@ v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d)
   sbits = vaddq_u64 (sbits, top);
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
   float64x2_t r2 = vmulq_f64 (r, r);
-  float64x2_t tmp = vfmaq_f64 (C (1), r, C (2));
-  tmp = vfmaq_f64 (C (0), r, tmp);
+  float64x2_t tmp = vfmaq_laneq_f64 (d->exp_c1, r, exp_consts, 1);
+  tmp = vfmaq_f64 (d->exp_c0, r, tmp);
   tmp = vfmaq_f64 (r, r2, tmp);
   float64x2_t scale = vreinterpretq_f64_u64 (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
@@ -158,54 +179,59 @@ v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d)
   return vfmaq_f64 (scale, scale, tmp);
 }
 
+static float64x2_t NOINLINE VPCS_ATTR
+scalar_fallback (float64x2_t x, float64x2_t y)
+{
+  return (float64x2_t){ pow_scalar_special_case (x[0], y[0]),
+			pow_scalar_special_case (x[1], y[1]) };
+}
+
 float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
 {
   const struct data *d = ptr_barrier (&data);
   /* Case of x <= 0 is too complicated to be vectorised efficiently here,
      fallback to scalar pow for all lanes if any x < 0 detected.  */
   if (v_any_u64 (vclezq_s64 (vreinterpretq_s64_f64 (x))))
-    return v_call2_f64 (__pl_finite_pow, x, y, x, v_u64 (-1));
+    return scalar_fallback (x, y);
 
   uint64x2_t vix = vreinterpretq_u64_f64 (x);
   uint64x2_t viy = vreinterpretq_u64_f64 (y);
-  uint64x2_t vtopx = vshrq_n_u64 (vix, 52);
-  uint64x2_t vtopy = vshrq_n_u64 (viy, 52);
-  uint64x2_t vabstopx = vandq_u64 (vtopx, v_u64 (0x7ff));
-  uint64x2_t vabstopy = vandq_u64 (vtopy, v_u64 (0x7ff));
+  uint64x2_t iay = vandq_u64 (viy, d->inf);
 
   /* Special cases of x or y.  */
 #if WANT_SIMD_EXCEPT
   /* Small or large.  */
+  uint64x2_t vtopx = vshrq_n_u64 (vix, 52);
+  uint64x2_t vabstopy = vshrq_n_u64 (iay, 52);
   uint64x2_t specialx
       = vcgeq_u64 (vsubq_u64 (vtopx, VecSmallPowX), VecThresPowX);
   uint64x2_t specialy
       = vcgeq_u64 (vsubq_u64 (vabstopy, VecSmallPowY), VecThresPowY);
 #else
-  /* Inf or nan.  */
-  uint64x2_t specialx = vcgeq_u64 (vabstopx, v_u64 (0x7ff));
-  uint64x2_t specialy = vcgeq_u64 (vabstopy, v_u64 (0x7ff));
   /* The case y==0 does not trigger a special case, since in this case it is
      necessary to fix the result only if x is a signalling nan, which already
      triggers a special case. We test y==0 directly in the scalar fallback.  */
+  uint64x2_t iax = vandq_u64 (vix, d->inf);
+  uint64x2_t specialx = vcgeq_u64 (iax, d->inf);
+  uint64x2_t specialy = vcgeq_u64 (iay, d->inf);
 #endif
   uint64x2_t special = vorrq_u64 (specialx, specialy);
   /* Fallback to scalar on all lanes if any lane is inf or nan.  */
   if (unlikely (v_any_u64 (special)))
-    return v_call2_f64 (__pl_finite_pow, x, y, x, v_u64 (-1));
+    return scalar_fallback (x, y);
 
   /* Small cases of x: |x| < 0x1p-126.  */
-  uint64x2_t smallx = vcltq_u64 (vabstopx, VecSmallPowX);
+  uint64x2_t smallx = vcaltq_f64 (x, d->small_powx);
   if (unlikely (v_any_u64 (smallx)))
     {
       /* Update ix if top 12 bits of x are 0.  */
-      uint64x2_t sub_x = vceqzq_u64 (vtopx);
+      uint64x2_t sub_x = vceqzq_u64 (vshrq_n_u64 (vix, 52));
       if (unlikely (v_any_u64 (sub_x)))
 	{
 	  /* Normalize subnormal x so exponent becomes negative.  */
-	  uint64x2_t vix_norm
-	      = vreinterpretq_u64_f64 (vmulq_f64 (x, v_f64 (0x1p52)));
-	  vix_norm = vandq_u64 (vix_norm, v_u64 (0x7fffffffffffffff));
-	  vix_norm = vsubq_u64 (vix_norm, v_u64 (52ULL << 52));
+	  uint64x2_t vix_norm = vreinterpretq_u64_f64 (
+	      vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (d->mask_sub_0))));
+	  vix_norm = vsubq_u64 (vix_norm, d->mask_sub_1);
 	  vix = vbslq_u64 (sub_x, vix_norm, vix);
 	}
     }
@@ -216,21 +242,20 @@ float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
 
   /* Vector Exp(y_loghi, y_loglo).  */
   float64x2_t vehi = vmulq_f64 (y, vhi);
-  float64x2_t velo = vmulq_f64 (y, vlo);
   float64x2_t vemi = vfmsq_f64 (vehi, y, vhi);
-  velo = vsubq_f64 (velo, vemi);
-  return v_exp_inline (vehi, velo, d);
+  float64x2_t neg_velo = vfmsq_f64 (vemi, y, vlo);
+  return v_exp_inline (vehi, neg_velo, d);
 }
 
-PL_SIG (V, D, 2, pow)
-PL_TEST_ULP (V_NAME_D2 (pow), 0.55)
-PL_TEST_EXPECT_FENV (V_NAME_D2 (pow), WANT_SIMD_EXCEPT)
+TEST_SIG (V, D, 2, pow)
+TEST_ULP (V_NAME_D2 (pow), 0.55)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D2 (pow), WANT_SIMD_EXCEPT)
 /* Wide intervals spanning the whole domain but shared between x and y.  */
-#define V_POW_INTERVAL2(xlo, xhi, ylo, yhi, n)                                 \
-  PL_TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, ylo, yhi, n)                   \
-  PL_TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n)                 \
-  PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n)                 \
-  PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+#define V_POW_INTERVAL2(xlo, xhi, ylo, yhi, n)                                \
+  TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, ylo, yhi, n)                     \
+  TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n)                   \
+  TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n)                   \
+  TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
 #define EXPAND(str) str##000000000
 #define SHL52(str) EXPAND (str)
 V_POW_INTERVAL2 (0, SHL52 (SmallPowX), 0, inf, 40000)
@@ -248,12 +273,12 @@ V_POW_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
 V_POW_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
 V_POW_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
 /* x is negative, y is odd or even integer, or y is real not integer.  */
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
 /* 1.0^y.  */
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
diff --git a/math/aarch64/advsimd/powf.c b/math/aarch64/advsimd/powf.c
new file mode 100644
index 00000000000000..47f74cf38ab09d
--- /dev/null
+++ b/math/aarch64/advsimd/powf.c
@@ -0,0 +1,209 @@
+/*
+ * Single-precision vector powf function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
+
+#define Min v_u32 (0x00800000)
+#define Max v_u32 (0x7f800000)
+#define Thresh v_u32 (0x7f000000) /* Max - Min.  */
+#define MantissaMask v_u32 (0x007fffff)
+
+#define A d->log2_poly
+#define C d->exp2f_poly
+
+/* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
+#define Off v_u32 (0x3f35d000)
+
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_EXP2F_TABLE_BITS 5
+#define Log2IdxMask ((1 << V_POWF_LOG2_TABLE_BITS) - 1)
+#define Scale ((double) (1 << V_EXP2F_TABLE_BITS))
+
+static const struct data
+{
+  struct
+  {
+    double invc, logc;
+  } log2_tab[1 << V_POWF_LOG2_TABLE_BITS];
+  float64x2_t log2_poly[4];
+  uint64_t exp2f_tab[1 << V_EXP2F_TABLE_BITS];
+  float64x2_t exp2f_poly[3];
+} data = {
+  .log2_tab = {{0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * Scale},
+	       {0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * Scale},
+	       {0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * Scale},
+	       {0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * Scale},
+	       {0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * Scale},
+	       {0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * Scale},
+	       {0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * Scale},
+	       {0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * Scale},
+	       {0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * Scale},
+	       {0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * Scale},
+	       {0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * Scale},
+	       {0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * Scale},
+	       {0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * Scale},
+	       {0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * Scale},
+	       {0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * Scale},
+	       {0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * Scale},
+	       {0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * Scale},
+	       {0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * Scale},
+	       {0x1p+0, 0x0p+0 * Scale},
+	       {0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * Scale},
+	       {0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * Scale},
+	       {0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * Scale},
+	       {0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * Scale},
+	       {0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * Scale},
+	       {0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * Scale},
+	       {0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * Scale},
+	       {0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * Scale},
+	       {0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * Scale},
+	       {0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * Scale},
+	       {0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * Scale},
+	       {0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * Scale},
+	       {0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * Scale},},
+  .log2_poly = { /* rel err: 1.5 * 2^-30.  */
+		 V2 (-0x1.6ff5daa3b3d7cp-2 * Scale),
+		 V2 (0x1.ec81d03c01aebp-2 * Scale),
+		 V2 (-0x1.71547bb43f101p-1 * Scale),
+		 V2 (0x1.7154764a815cbp0 * Scale)},
+  .exp2f_tab = {0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
+		0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
+		0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
+		0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+		0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
+		0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
+		0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
+		0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+		0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
+		0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
+		0x3fefa4afa2a490da, 0x3fefd0765b6e4540,},
+  .exp2f_poly = { /* rel err: 1.69 * 2^-34.  */
+		  V2 (0x1.c6af84b912394p-5 / Scale / Scale / Scale),
+		  V2 (0x1.ebfce50fac4f3p-3 / Scale / Scale),
+		  V2 (0x1.62e42ff0c52d6p-1 / Scale)}};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)
+{
+  return v_call2_f32 (powf, x, y, ret, cmp);
+}
+
+static inline float64x2_t
+ylogx_core (const struct data *d, float64x2_t iz, float64x2_t k,
+	    float64x2_t invc, float64x2_t logc, float64x2_t y)
+{
+
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), iz, invc);
+  float64x2_t y0 = vaddq_f64 (logc, k);
+
+  /* Polynomial to approximate log1p(r)/ln2.  */
+  float64x2_t logx = vfmaq_f64 (A[1], r, A[0]);
+  logx = vfmaq_f64 (A[2], logx, r);
+  logx = vfmaq_f64 (A[3], logx, r);
+  logx = vfmaq_f64 (y0, logx, r);
+
+  return vmulq_f64 (logx, y);
+}
+
+static inline float64x2_t
+log2_lookup (const struct data *d, uint32_t i)
+{
+  return vld1q_f64 (
+      &d->log2_tab[(i >> (23 - V_POWF_LOG2_TABLE_BITS)) & Log2IdxMask].invc);
+}
+
+static inline uint64x1_t
+exp2f_lookup (const struct data *d, uint64_t i)
+{
+  return vld1_u64 (&d->exp2f_tab[i % (1 << V_EXP2F_TABLE_BITS)]);
+}
+
+static inline float32x2_t
+powf_core (const struct data *d, float64x2_t ylogx)
+{
+  /* N*x = k + r with r in [-1/2, 1/2].  */
+  float64x2_t kd = vrndnq_f64 (ylogx);
+  int64x2_t ki = vcvtaq_s64_f64 (ylogx);
+  float64x2_t r = vsubq_f64 (ylogx, kd);
+
+  /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
+  uint64x2_t t = vcombine_u64 (exp2f_lookup (d, vgetq_lane_s64 (ki, 0)),
+			       exp2f_lookup (d, vgetq_lane_s64 (ki, 1)));
+  t = vaddq_u64 (
+      t, vreinterpretq_u64_s64 (vshlq_n_s64 (ki, 52 - V_EXP2F_TABLE_BITS)));
+  float64x2_t s = vreinterpretq_f64_u64 (t);
+  float64x2_t p = vfmaq_f64 (C[1], r, C[0]);
+  p = vfmaq_f64 (C[2], r, p);
+  p = vfmaq_f64 (s, p, vmulq_f64 (s, r));
+  return vcvt_f32_f64 (p);
+}
+
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint32x4_t u = vreinterpretq_u32_f32 (x);
+  uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (u, Min), Thresh);
+  uint32x4_t tmp = vsubq_u32 (u, Off);
+  uint32x4_t top = vbicq_u32 (tmp, MantissaMask);
+  float32x4_t iz = vreinterpretq_f32_u32 (vsubq_u32 (u, top));
+  int32x4_t k = vshrq_n_s32 (vreinterpretq_s32_u32 (top),
+			     23 - V_EXP2F_TABLE_BITS); /* arithmetic shift.  */
+
+  /* Use double precision for each lane: split input vectors into lo and hi
+     halves and promote.  */
+  float64x2_t tab0 = log2_lookup (d, vgetq_lane_u32 (tmp, 0)),
+	      tab1 = log2_lookup (d, vgetq_lane_u32 (tmp, 1)),
+	      tab2 = log2_lookup (d, vgetq_lane_u32 (tmp, 2)),
+	      tab3 = log2_lookup (d, vgetq_lane_u32 (tmp, 3));
+
+  float64x2_t iz_lo = vcvt_f64_f32 (vget_low_f32 (iz)),
+	      iz_hi = vcvt_high_f64_f32 (iz);
+
+  float64x2_t k_lo = vcvtq_f64_s64 (vmovl_s32 (vget_low_s32 (k))),
+	      k_hi = vcvtq_f64_s64 (vmovl_high_s32 (k));
+
+  float64x2_t invc_lo = vzip1q_f64 (tab0, tab1),
+	      invc_hi = vzip1q_f64 (tab2, tab3),
+	      logc_lo = vzip2q_f64 (tab0, tab1),
+	      logc_hi = vzip2q_f64 (tab2, tab3);
+
+  float64x2_t y_lo = vcvt_f64_f32 (vget_low_f32 (y)),
+	      y_hi = vcvt_high_f64_f32 (y);
+
+  float64x2_t ylogx_lo = ylogx_core (d, iz_lo, k_lo, invc_lo, logc_lo, y_lo);
+  float64x2_t ylogx_hi = ylogx_core (d, iz_hi, k_hi, invc_hi, logc_hi, y_hi);
+
+  uint32x4_t ylogx_top = vuzp2q_u32 (vreinterpretq_u32_f64 (ylogx_lo),
+				     vreinterpretq_u32_f64 (ylogx_hi));
+
+  cmp = vorrq_u32 (
+      cmp, vcgeq_u32 (vandq_u32 (vshrq_n_u32 (ylogx_top, 15), v_u32 (0xffff)),
+		      vdupq_n_u32 (asuint64 (126.0 * (1 << V_EXP2F_TABLE_BITS))
+				   >> 47)));
+
+  float32x2_t p_lo = powf_core (d, ylogx_lo);
+  float32x2_t p_hi = powf_core (d, ylogx_hi);
+
+  if (unlikely (v_any_u32 (cmp)))
+    return special_case (x, y, vcombine_f32 (p_lo, p_hi), cmp);
+  return vcombine_f32 (p_lo, p_hi);
+}
+
+HALF_WIDTH_ALIAS_F2 (pow)
+
+TEST_SIG (V, F, 2, pow)
+TEST_ULP (V_NAME_F2 (pow), 2.1)
+TEST_DISABLE_FENV (V_NAME_F2 (pow))
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1p-1, 0x1p1, 0x1p-7, 0x1p7, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1p-1, 0x1p1, -0x1p-7, -0x1p7, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1p-70, 0x1p70, 0x1p-1, 0x1p1, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1p-70, 0x1p70, -0x1p-1, -0x1p1, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p14, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1.ep-1, 0x1.1p0, -0x1p8, -0x1p14, 50000)
diff --git a/math/aarch64/v_sin.c b/math/aarch64/advsimd/sin.c
similarity index 77%
rename from math/aarch64/v_sin.c
rename to math/aarch64/advsimd/sin.c
index 04129c31133d62..0461bbb994059a 100644
--- a/math/aarch64/v_sin.c
+++ b/math/aarch64/advsimd/sin.c
@@ -1,17 +1,19 @@
 /*
  * Double-precision vector sin function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#include "test_defs.h"
+#include "test_sig.h"
 #include "mathlib.h"
 #include "v_math.h"
 
 static const struct data
 {
   float64x2_t poly[7];
-  float64x2_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
+  float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
 	    V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
@@ -23,12 +25,13 @@ static const struct data
   .pi_1 = V2 (0x1.921fb54442d18p+1),
   .pi_2 = V2 (0x1.1a62633145c06p-53),
   .pi_3 = V2 (0x1.c1cd129024e09p-106),
-  .shift = V2 (0x1.8p52),
 };
 
 #if WANT_SIMD_EXCEPT
-# define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255).  */
-# define Thresh v_u64 (0x1160000000000000)    /* RangeVal - TinyBound.  */
+/* asuint64(0x1p-253)), below which multiply by inv_pi underflows.  */
+# define TinyBound v_u64 (0x3020000000000000)
+/* RangeVal - TinyBound.  */
+# define Thresh v_u64 (0x1160000000000000)
 #endif
 
 #define C(i) d->poly[i]
@@ -61,16 +64,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
      fenv). These lanes will be fixed by special-case handler later.  */
   uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x));
   cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh);
-  r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x);
+  r = vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), cmp));
 #else
   r = x;
   cmp = vcageq_f64 (x, d->range_val);
 #endif
 
   /* n = rint(|x|/pi).  */
-  n = vfmaq_f64 (d->shift, d->inv_pi, r);
-  odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63);
-  n = vsubq_f64 (n, d->shift);
+  n = vrndaq_f64 (vmulq_f64 (r, d->inv_pi));
+  odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63);
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f64 (r, d->pi_1, n);
@@ -95,3 +97,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
 }
+
+TEST_SIG (V, D, 1, sin, -3.1, 3.1)
+TEST_ULP (V_NAME_D1 (sin), 3.0)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (sin), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (sin), 0, 0x1p23, 500000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sin), 0x1p23, inf, 10000)
diff --git a/pl/math/v_sincos_3u5.c b/math/aarch64/advsimd/sincos.c
similarity index 70%
rename from pl/math/v_sincos_3u5.c
rename to math/aarch64/advsimd/sincos.c
index 6fc014c120b866..83bfa45efa982c 100644
--- a/pl/math/v_sincos_3u5.c
+++ b/math/aarch64/advsimd/sincos.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision vector sincos function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -10,12 +10,21 @@
    be linked against the scalar sincosf from math/.  */
 #define _GNU_SOURCE
 #include <math.h>
-#undef _GNU_SOURCE
 
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
 #include "v_sincos_common.h"
 
+/* sincos not available for all scalar libm implementations.  */
+#if defined(_MSC_VER) || !defined(__GLIBC__)
+static void
+sincos (double x, double *out_sin, double *out_cos)
+{
+  *out_sin = sin (x);
+  *out_cos = cos (x);
+}
+#endif
+
 static void VPCS_ATTR NOINLINE
 special_case (float64x2_t x, uint64x2_t special, double *out_sin,
 	      double *out_cos)
@@ -46,12 +55,13 @@ _ZGVnN2vl8l8_sincos (float64x2_t x, double *out_sin, double *out_cos)
     special_case (x, special, out_sin, out_cos);
 }
 
-PL_TEST_ULP (_ZGVnN2v_sincos_sin, 2.73)
-PL_TEST_ULP (_ZGVnN2v_sincos_cos, 2.73)
+TEST_DISABLE_FENV (_ZGVnN2v_sincos_cos)
+TEST_DISABLE_FENV (_ZGVnN2v_sincos_sin)
+TEST_ULP (_ZGVnN2v_sincos_sin, 2.73)
+TEST_ULP (_ZGVnN2v_sincos_cos, 2.73)
 #define V_SINCOS_INTERVAL(lo, hi, n)                                          \
-  PL_TEST_INTERVAL (_ZGVnN2v_sincos_sin, lo, hi, n)                           \
-  PL_TEST_INTERVAL (_ZGVnN2v_sincos_cos, lo, hi, n)
-V_SINCOS_INTERVAL (0, 0x1p23, 500000)
-V_SINCOS_INTERVAL (-0, -0x1p23, 500000)
+  TEST_INTERVAL (_ZGVnN2v_sincos_sin, lo, hi, n)                              \
+  TEST_INTERVAL (_ZGVnN2v_sincos_cos, lo, hi, n)
+V_SINCOS_INTERVAL (0, 0x1p-31, 50000)
+V_SINCOS_INTERVAL (0x1p-31, 0x1p23, 500000)
 V_SINCOS_INTERVAL (0x1p23, inf, 10000)
-V_SINCOS_INTERVAL (-0x1p23, -inf, 10000)
diff --git a/pl/math/v_sincosf_1u8.c b/math/aarch64/advsimd/sincosf.c
similarity index 70%
rename from pl/math/v_sincosf_1u8.c
rename to math/aarch64/advsimd/sincosf.c
index bf77afaa14db02..cd482f38d5f645 100644
--- a/pl/math/v_sincosf_1u8.c
+++ b/math/aarch64/advsimd/sincosf.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision vector sincos function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -10,11 +10,20 @@
    be linked against the scalar sincosf from math/.  */
 #define _GNU_SOURCE
 #include <math.h>
-#undef _GNU_SOURCE
 
 #include "v_sincosf_common.h"
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
+
+/* sincos not available for all scalar libm implementations.  */
+#if defined(_MSC_VER) || !defined(__GLIBC__)
+static void
+sincosf (float x, float *out_sin, float *out_cos)
+{
+  *out_sin = sinf (x);
+  *out_cos = cosf (x);
+}
+#endif
 
 static void VPCS_ATTR NOINLINE
 special_case (float32x4_t x, uint32x4_t special, float *out_sin,
@@ -47,12 +56,13 @@ _ZGVnN4vl4l4_sincosf (float32x4_t x, float *out_sin, float *out_cos)
     special_case (x, special, out_sin, out_cos);
 }
 
-PL_TEST_ULP (_ZGVnN4v_sincosf_sin, 1.17)
-PL_TEST_ULP (_ZGVnN4v_sincosf_cos, 1.31)
+TEST_DISABLE_FENV (_ZGVnN4v_sincosf_sin)
+TEST_DISABLE_FENV (_ZGVnN4v_sincosf_cos)
+TEST_ULP (_ZGVnN4v_sincosf_sin, 1.17)
+TEST_ULP (_ZGVnN4v_sincosf_cos, 1.31)
 #define V_SINCOSF_INTERVAL(lo, hi, n)                                         \
-  PL_TEST_INTERVAL (_ZGVnN4v_sincosf_sin, lo, hi, n)                          \
-  PL_TEST_INTERVAL (_ZGVnN4v_sincosf_cos, lo, hi, n)
-V_SINCOSF_INTERVAL (0, 0x1p20, 500000)
-V_SINCOSF_INTERVAL (-0, -0x1p20, 500000)
+  TEST_INTERVAL (_ZGVnN4v_sincosf_sin, lo, hi, n)                             \
+  TEST_INTERVAL (_ZGVnN4v_sincosf_cos, lo, hi, n)
+V_SINCOSF_INTERVAL (0, 0x1p-31, 50000)
+V_SINCOSF_INTERVAL (0x1p-31, 0x1p20, 500000)
 V_SINCOSF_INTERVAL (0x1p20, inf, 10000)
-V_SINCOSF_INTERVAL (-0x1p20, -inf, 10000)
diff --git a/math/aarch64/advsimd/sincospi.c b/math/aarch64/advsimd/sincospi.c
new file mode 100644
index 00000000000000..fd425202ce6706
--- /dev/null
+++ b/math/aarch64/advsimd/sincospi.c
@@ -0,0 +1,44 @@
+/*
+ * Double-precision vector sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_sincospi_common.h"
+#include "v_math.h"
+#include "test_defs.h"
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+   one function call, using separate argument reduction and shared low-order
+   polynomials.
+   Approximation for vector double-precision sincospi(x).
+   Maximum Error 3.09 ULP:
+  _ZGVnN2v_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
+					      want 0x1.fd54d0b327cf4p-1
+   Maximum Error 3.16 ULP:
+  _ZGVnN2v_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
+					      want 0x1.fd2da484ff402p-1.  */
+VPCS_ATTR void
+_ZGVnN2vl8l8_sincospi (float64x2_t x, double *out_sin, double *out_cos)
+{
+  const struct v_sincospi_data *d = ptr_barrier (&v_sincospi_data);
+
+  float64x2x2_t sc = v_sincospi_inline (x, d);
+
+  vst1q_f64 (out_sin, sc.val[0]);
+  vst1q_f64 (out_cos, sc.val[1]);
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (_ZGVnN2v_sincospi_cos)
+TEST_DISABLE_FENV (_ZGVnN2v_sincospi_sin)
+TEST_ULP (_ZGVnN2v_sincospi_sin, 2.59)
+TEST_ULP (_ZGVnN2v_sincospi_cos, 2.66)
+#  define V_SINCOSPI_INTERVAL(lo, hi, n)                                      \
+    TEST_SYM_INTERVAL (_ZGVnN2v_sincospi_sin, lo, hi, n)                      \
+    TEST_SYM_INTERVAL (_ZGVnN2v_sincospi_cos, lo, hi, n)
+V_SINCOSPI_INTERVAL (0, 0x1p-63, 10000)
+V_SINCOSPI_INTERVAL (0x1p-63, 0.5, 50000)
+V_SINCOSPI_INTERVAL (0.5, 0x1p63, 50000)
+V_SINCOSPI_INTERVAL (0x1p63, inf, 10000)
+#endif
diff --git a/math/aarch64/advsimd/sincospif.c b/math/aarch64/advsimd/sincospif.c
new file mode 100644
index 00000000000000..760ea3d4f5e181
--- /dev/null
+++ b/math/aarch64/advsimd/sincospif.c
@@ -0,0 +1,43 @@
+/*
+ * Single-precision vector sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_sincospif_common.h"
+#include "v_math.h"
+#include "test_defs.h"
+#include "mathlib.h"
+
+/* Single-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+   Worst-case error for sin is 3.04 ULP:
+   _ZGVnN4v_sincospif_sin(0x1.1d341ap-1) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+   Worst-case error for cos is 3.18 ULP:
+   _ZGVnN4v_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+ */
+VPCS_ATTR void
+_ZGVnN4vl4l4_sincospif (float32x4_t x, float *out_sin, float *out_cos)
+{
+  const struct v_sincospif_data *d = ptr_barrier (&v_sincospif_data);
+
+  float32x4x2_t sc = v_sincospif_inline (x, d);
+
+  vst1q_f32 (out_sin, sc.val[0]);
+  vst1q_f32 (out_cos, sc.val[1]);
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (_ZGVnN4v_sincospif_sin)
+TEST_DISABLE_FENV (_ZGVnN4v_sincospif_cos)
+TEST_ULP (_ZGVnN4v_sincospif_sin, 2.54)
+TEST_ULP (_ZGVnN4v_sincospif_cos, 2.68)
+#  define V_SINCOSPIF_INTERVAL(lo, hi, n)                                     \
+    TEST_SYM_INTERVAL (_ZGVnN4v_sincospif_sin, lo, hi, n)                     \
+    TEST_SYM_INTERVAL (_ZGVnN4v_sincospif_cos, lo, hi, n)
+V_SINCOSPIF_INTERVAL (0, 0x1p-63, 10000)
+V_SINCOSPIF_INTERVAL (0x1p-63, 0.5, 50000)
+V_SINCOSPIF_INTERVAL (0.5, 0x1p31, 50000)
+V_SINCOSPIF_INTERVAL (0x1p31, inf, 10000)
+#endif
diff --git a/math/aarch64/v_sinf.c b/math/aarch64/advsimd/sinf.c
similarity index 65%
rename from math/aarch64/v_sinf.c
rename to math/aarch64/advsimd/sinf.c
index 336879844459f7..0764434039a073 100644
--- a/math/aarch64/v_sinf.c
+++ b/math/aarch64/advsimd/sinf.c
@@ -1,17 +1,19 @@
 /*
  * Single-precision vector sin function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
   float32x4_t poly[4];
-  float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
+  float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* 1.886 ulp error.  */
   .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f),
@@ -22,13 +24,14 @@ static const struct data
   .pi_3 = V4 (-0x1.ee59dap-49f),
 
   .inv_pi = V4 (0x1.45f306p-2f),
-  .shift = V4 (0x1.8p+23f),
   .range_val = V4 (0x1p20f)
 };
 
 #if WANT_SIMD_EXCEPT
-# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f).  */
-# define Thresh v_u32 (0x28800000)    /* RangeVal - TinyBound.  */
+/* asuint32(0x1p-59f), below which multiply by inv_pi underflows.  */
+# define TinyBound v_u32 (0x22000000)
+/* RangeVal - TinyBound.  */
+# define Thresh v_u32 (0x27800000)
 #endif
 
 #define C(i) d->poly[i]
@@ -41,7 +44,7 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
   return v_call_f32 (sinf, x, y, cmp);
 }
 
-float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t n, r, r2, y;
@@ -53,23 +56,22 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
   /* If fenv exceptions are to be triggered correctly, set any special lanes
      to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
      special-case handler later.  */
-  r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x);
+  r = vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), cmp));
 #else
   r = x;
   cmp = vcageq_f32 (x, d->range_val);
 #endif
 
-  /* n = rint(|x|/pi) */
-  n = vfmaq_f32 (d->shift, d->inv_pi, r);
-  odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31);
-  n = vsubq_f32 (n, d->shift);
+  /* n = rint(|x|/pi).  */
+  n = vrndaq_f32 (vmulq_f32 (r, d->inv_pi));
+  odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31);
 
-  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f32 (r, d->pi_1, n);
   r = vfmsq_f32 (r, d->pi_2, n);
   r = vfmsq_f32 (r, d->pi_3, n);
 
-  /* y = sin(r) */
+  /* y = sin(r).  */
   r2 = vmulq_f32 (r, r);
   y = vfmaq_f32 (C (2), C (3), r2);
   y = vfmaq_f32 (C (1), y, r2);
@@ -80,3 +82,11 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
 }
+
+HALF_WIDTH_ALIAS_F1 (sin)
+
+TEST_SIG (V, F, 1, sin, -3.1, 3.1)
+TEST_ULP (V_NAME_F1 (sin), 1.4)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (sin), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (sin), 0, 0x1p20, 500000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sin), 0x1p20, inf, 10000)
diff --git a/math/aarch64/advsimd/sinh.c b/math/aarch64/advsimd/sinh.c
new file mode 100644
index 00000000000000..f65ccd0c627005
--- /dev/null
+++ b/math/aarch64/advsimd/sinh.c
@@ -0,0 +1,80 @@
+/*
+ * Double-precision vector sinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_expm1_inline.h"
+
+static const struct data
+{
+  struct v_expm1_data d;
+  uint64x2_t halff;
+#if WANT_SIMD_EXCEPT
+  uint64x2_t tiny_bound, thresh;
+#else
+  float64x2_t large_bound;
+#endif
+} data = {
+  .d = V_EXPM1_DATA,
+  .halff = V2 (0x3fe0000000000000),
+#if WANT_SIMD_EXCEPT
+  /* 2^-26, below which sinh(x) rounds to x.  */
+  .tiny_bound = V2 (0x3e50000000000000),
+  /* asuint(large_bound) - asuint(tiny_bound).  */
+  .thresh = V2 (0x0230000000000000),
+#else
+  /* 2^9. expm1 helper overflows for large input.  */
+  .large_bound = V2 (0x1p+9),
+#endif
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x)
+{
+  return v_call_f64 (sinh, x, x, v_u64 (-1));
+}
+
+/* Approximation for vector double-precision sinh(x) using expm1.
+   sinh(x) = (exp(x) - exp(-x)) / 2.
+   The greatest observed error is 2.52 ULP:
+   _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2
+				       want -0x1.ac2f05bb66fc9p-2.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  float64x2_t ax = vabsq_f64 (x);
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  float64x2_t halfsign = vreinterpretq_f64_u64 (
+      vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff));
+
+#if WANT_SIMD_EXCEPT
+  uint64x2_t special = vcgeq_u64 (
+      vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
+#else
+  uint64x2_t special = vcageq_f64 (x, d->large_bound);
+#endif
+
+  /* Fall back to scalar variant for all lanes if any of them are special.  */
+  if (unlikely (v_any_u64 (special)))
+    return special_case (x);
+
+  /* Up to the point that expm1 overflows, we can use it to calculate sinh
+     using a slight rearrangement of the definition of sinh. This allows us to
+     retain acceptable accuracy for very small inputs.  */
+  float64x2_t t = expm1_inline (ax, &d->d);
+  t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
+  return vmulq_f64 (t, halfsign);
+}
+
+TEST_SIG (V, D, 1, sinh, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (sinh), 2.02)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (sinh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0, 0x1p-26, 1000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p9, inf, 1000)
diff --git a/pl/math/v_sinhf_2u3.c b/math/aarch64/advsimd/sinhf.c
similarity index 59%
rename from pl/math/v_sinhf_2u3.c
rename to math/aarch64/advsimd/sinhf.c
index cd8c0f08f78444..12dbe26b425b73 100644
--- a/pl/math/v_sinhf_2u3.c
+++ b/math/aarch64/advsimd/sinhf.c
@@ -1,28 +1,25 @@
 /*
  * Single-precision vector sinh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
+#include "test_sig.h"
+#include "test_defs.h"
 #include "v_expm1f_inline.h"
 
 static const struct data
 {
   struct v_expm1f_data expm1f_consts;
-  uint32x4_t halff;
 #if WANT_SIMD_EXCEPT
   uint32x4_t tiny_bound, thresh;
 #else
-  uint32x4_t oflow_bound;
+  float32x4_t oflow_bound;
 #endif
 } data = {
   .expm1f_consts = V_EXPM1F_DATA,
-  .halff = V4 (0x3f000000),
 #if WANT_SIMD_EXCEPT
   /* 0x1.6a09e8p-32, below which expm1f underflows.  */
   .tiny_bound = V4 (0x2fb504f4),
@@ -30,14 +27,15 @@ static const struct data
   .thresh = V4 (0x12fbbbb3),
 #else
   /* 0x1.61814ep+6, above which expm1f helper overflows.  */
-  .oflow_bound = V4 (0x42b0c0a7),
+  .oflow_bound = V4 (0x1.61814ep+6),
 #endif
 };
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t t, float32x4_t halfsign,
+	      uint32x4_t special)
 {
-  return v_call_f32 (sinhf, x, y, special);
+  return v_call_f32 (sinhf, x, vmulq_f32 (t, halfsign), special);
 }
 
 /* Approximation for vector single-precision sinh(x) using expm1.
@@ -45,21 +43,21 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
    The maximum error is 2.26 ULP:
    _ZGVnN4v_sinhf (0x1.e34a9ep-4) got 0x1.e469ep-4
 				 want 0x1.e469e4p-4.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (sinh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   float32x4_t ax = vabsq_f32 (x);
-  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
-  uint32x4_t sign = veorq_u32 (ix, iax);
-  float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff));
+  float32x4_t halfsign = vreinterpretq_f32_u32 (
+      vbslq_u32 (v_u32 (0x80000000), ix, vreinterpretq_u32_f32 (v_f32 (0.5))));
 
 #if WANT_SIMD_EXCEPT
-  uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh);
+  uint32x4_t special = vcgeq_u32 (
+      vsubq_u32 (vreinterpretq_u32_f32 (ax), d->tiny_bound), d->thresh);
   ax = v_zerofy_f32 (ax, special);
 #else
-  uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound);
+  uint32x4_t special = vcageq_f32 (x, d->oflow_bound);
 #endif
 
   /* Up to the point that expm1f overflows, we can use it to calculate sinhf
@@ -71,14 +69,16 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sinh) (float32x4_t x)
   /* Fall back to the scalar variant for any lanes that should trigger an
      exception.  */
   if (unlikely (v_any_u32 (special)))
-    return special_case (x, vmulq_f32 (t, halfsign), special);
+    return special_case (x, t, halfsign, special);
 
   return vmulq_f32 (t, halfsign);
 }
 
-PL_SIG (V, F, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (sinh), 1.76)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (sinh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0, 0x2fb504f4, 1000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x2fb504f4, 0x42b0c0a7, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
+HALF_WIDTH_ALIAS_F1 (sinh)
+
+TEST_SIG (V, F, 1, sinh, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (sinh), 1.76)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (sinh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0, 0x2fb504f4, 1000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x2fb504f4, 0x42b0c0a7, 100000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
diff --git a/pl/math/v_sinpi_3u1.c b/math/aarch64/advsimd/sinpi.c
similarity index 81%
rename from pl/math/v_sinpi_3u1.c
rename to math/aarch64/advsimd/sinpi.c
index 8d2917ff8ecd70..f86d167a2ac3d3 100644
--- a/pl/math/v_sinpi_3u1.c
+++ b/math/aarch64/advsimd/sinpi.c
@@ -1,15 +1,15 @@
 /*
  * Double-precision vector sinpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -34,7 +34,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
 {
   /* Fall back to scalar code.  */
   y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
-  return v_call_f64 (sinpi, x, y, cmp);
+  return v_call_f64 (arm_math_sinpi, x, y, cmp);
 }
 #endif
 
@@ -77,10 +77,11 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sinpi) (float64x2_t x)
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
 }
 
-PL_SIG (V, D, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (V_NAME_D1 (sinpi), 3.06)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (sinpi), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p51, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (V_NAME_D1 (sinpi), 2.56)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (sinpi), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p51, inf, 10000)
+#endif
diff --git a/pl/math/v_sinpif_3u.c b/math/aarch64/advsimd/sinpif.c
similarity index 76%
rename from pl/math/v_sinpif_3u.c
rename to math/aarch64/advsimd/sinpif.c
index 3d6eeff333f7e8..98ba9d84d2fb07 100644
--- a/pl/math/v_sinpif_3u.c
+++ b/math/aarch64/advsimd/sinpif.c
@@ -1,15 +1,15 @@
 /*
  * Single-precision vector sinpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -29,7 +29,7 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
 {
   /* Fall back to scalar code.  */
   y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
-  return v_call_f32 (sinpif, x, y, cmp);
+  return v_call_f32 (arm_math_sinpif, x, y, cmp);
 }
 #endif
 
@@ -37,7 +37,7 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
     Maximum Error 3.03 ULP:
     _ZGVnN4v_sinpif(0x1.c597ccp-2) got 0x1.f7cd56p-1
 				  want 0x1.f7cd5p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (sinpi) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinpi) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -72,10 +72,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sinpi) (float32x4_t x)
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
 }
 
-PL_SIG (V, F, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (V_NAME_F1 (sinpi), 2.54)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (sinpi), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0.5, 0x1p31f, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p31f, inf, 10000)
+HALF_WIDTH_ALIAS_F1 (sinpi)
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (V_NAME_F1 (sinpi), 2.54)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (sinpi), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0.5, 0x1p31f, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p31f, inf, 10000)
+#endif
diff --git a/pl/math/v_tan_3u5.c b/math/aarch64/advsimd/tan.c
similarity index 86%
rename from pl/math/v_tan_3u5.c
rename to math/aarch64/advsimd/tan.c
index c431c8c4889ef8..957f9aba3a1e63 100644
--- a/pl/math/v_tan_3u5.c
+++ b/math/aarch64/advsimd/tan.c
@@ -1,19 +1,20 @@
 /*
  * Double-precision vector tan(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t poly[9];
-  float64x2_t half_pi, two_over_pi, shift;
+  double half_pi[2];
+  float64x2_t two_over_pi, shift;
 #if !WANT_SIMD_EXCEPT
   float64x2_t range_val;
 #endif
@@ -71,8 +72,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
   /* Use q to reduce x to r in [-pi/4, pi/4], by:
      r = x - q * pi/2, in extended precision.  */
   float64x2_t r = x;
-  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0);
-  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1);
+  float64x2_t half_pi = vld1q_f64 (dat->half_pi);
+  r = vfmsq_laneq_f64 (r, q, half_pi, 0);
+  r = vfmsq_laneq_f64 (r, q, half_pi, 1);
   /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
      formula.  */
   r = vmulq_n_f64 (r, 0.5);
@@ -112,9 +114,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
 		    vbslq_f64 (no_recip, d, n));
 }
 
-PL_SIG (V, D, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (V_NAME_D1 (tan), 2.99)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (tan), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), 0, TinyBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), TinyBound, RangeVal, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), RangeVal, inf, 5000)
+TEST_SIG (V, D, 1, tan, -3.1, 3.1)
+TEST_ULP (V_NAME_D1 (tan), 2.99)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (tan), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (tan), 0, TinyBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tan), TinyBound, RangeVal, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tan), RangeVal, inf, 5000)
diff --git a/pl/math/v_tanf_3u5.c b/math/aarch64/advsimd/tanf.c
similarity index 83%
rename from pl/math/v_tanf_3u5.c
rename to math/aarch64/advsimd/tanf.c
index 98948b0a9ecfb8..ed5448649f6cd7 100644
--- a/pl/math/v_tanf_3u5.c
+++ b/math/aarch64/advsimd/tanf.c
@@ -1,19 +1,19 @@
 /*
  * Single-precision vector tan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32x4_t poly[6];
-  float32x4_t pi_consts;
+  float pi_consts[4];
   float32x4_t shift;
 #if !WANT_SIMD_EXCEPT
   float32x4_t range_val;
@@ -64,7 +64,7 @@ eval_poly (float32x4_t z, const struct data *d)
    Maximum error is 3.45 ULP:
    __v_tanf(-0x1.e5f0cap+13) got 0x1.ff9856p-1
 			    want 0x1.ff9850p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (tan) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t special_arg = x;
@@ -85,16 +85,17 @@ float32x4_t VPCS_ATTR V_NAME_F1 (tan) (float32x4_t x)
 #endif
 
   /* n = rint(x/(pi/2)).  */
-  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3);
+  float32x4_t pi_consts = vld1q_f32 (d->pi_consts);
+  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, pi_consts, 3);
   float32x4_t n = vsubq_f32 (q, d->shift);
   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
   uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1));
 
   /* r = x - n * (pi/2)  (range reduction into -pi./4 .. pi/4).  */
   float32x4_t r;
-  r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0);
-  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1);
-  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2);
+  r = vfmaq_laneq_f32 (x, n, pi_consts, 0);
+  r = vfmaq_laneq_f32 (r, n, pi_consts, 1);
+  r = vfmaq_laneq_f32 (r, n, pi_consts, 2);
 
   /* If x lives in an interval, where |tan(x)|
      - is finite, then use a polynomial approximation of the form
@@ -119,9 +120,11 @@ float32x4_t VPCS_ATTR V_NAME_F1 (tan) (float32x4_t x)
   return vbslq_f32 (pred_alt, inv_y, y);
 }
 
-PL_SIG (V, F, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (V_NAME_F1 (tan), 2.96)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (tan), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p-31, 0x1p15, 500000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p15, inf, 5000)
+HALF_WIDTH_ALIAS_F1 (tan)
+
+TEST_SIG (V, F, 1, tan, -3.1, 3.1)
+TEST_ULP (V_NAME_F1 (tan), 2.96)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (tan), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p-31, 0x1p15, 500000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p15, inf, 5000)
diff --git a/math/aarch64/advsimd/tanh.c b/math/aarch64/advsimd/tanh.c
new file mode 100644
index 00000000000000..3dc6e5527ffce2
--- /dev/null
+++ b/math/aarch64/advsimd/tanh.c
@@ -0,0 +1,67 @@
+/*
+ * Double-precision vector tanh(x) function.
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_expm1_inline.h"
+
+static const struct data
+{
+  struct v_expm1_data d;
+  uint64x2_t thresh, tiny_bound;
+} data = {
+  .d = V_EXPM1_DATA,
+  .tiny_bound = V2 (0x3e40000000000000), /* asuint64 (0x1p-27).  */
+  /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound).  */
+  .thresh = V2 (0x01f241bf835f9d5f),
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t q, float64x2_t qp2,
+	      uint64x2_t special)
+{
+  return v_call_f64 (tanh, x, vdivq_f64 (q, qp2), special);
+}
+
+/* Vector approximation for double-precision tanh(x), using a simplified
+   version of expm1. The greatest observed error is 2.70 ULP:
+   _ZGVnN2v_tanh(-0x1.c59aa220cb177p-3) got -0x1.be5452a6459fep-3
+				       want -0x1.be5452a6459fbp-3.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (tanh) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+  float64x2_t u = x;
+
+  /* Trigger special-cases for tiny, boring and infinity/NaN.  */
+  uint64x2_t special = vcgtq_u64 (vsubq_u64 (ia, d->tiny_bound), d->thresh);
+#if WANT_SIMD_EXCEPT
+  /* To trigger fp exceptions correctly, set special lanes to a neutral value.
+     They will be fixed up later by the special-case handler.  */
+  if (unlikely (v_any_u64 (special)))
+    u = v_zerofy_f64 (u, special);
+#endif
+
+  u = vaddq_f64 (u, u);
+
+  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
+  float64x2_t q = expm1_inline (u, &d->d);
+  float64x2_t qp2 = vaddq_f64 (q, v_f64 (2.0));
+
+  if (unlikely (v_any_u64 (special)))
+    return special_case (x, q, qp2, special);
+  return vdivq_f64 (q, qp2);
+}
+
+TEST_SIG (V, D, 1, tanh, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (tanh), 2.21)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (tanh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0, 0x1p-27, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
diff --git a/pl/math/v_tanhf_2u6.c b/math/aarch64/advsimd/tanhf.c
similarity index 62%
rename from pl/math/v_tanhf_2u6.c
rename to math/aarch64/advsimd/tanhf.c
index d1cb9fb6eeb3af..18fe93c7e7ba74 100644
--- a/pl/math/v_tanhf_2u6.c
+++ b/math/aarch64/advsimd/tanhf.c
@@ -1,14 +1,13 @@
 /*
  * Single-precision vector tanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
+#include "test_sig.h"
+#include "test_defs.h"
 #include "v_expm1f_inline.h"
 
 static const struct data
@@ -20,20 +19,23 @@ static const struct data
   /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for  negative).  */
   .boring_bound = V4 (0x41102cb3),
   .large_bound = V4 (0x7f800000),
-  .onef = V4 (0x3f800000),
 };
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t is_boring, float32x4_t boring,
+	      float32x4_t q, uint32x4_t special)
 {
-  return v_call_f32 (tanhf, x, y, special);
+  return v_call_f32 (
+      tanhf, x,
+      vbslq_f32 (is_boring, boring, vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)))),
+      special);
 }
 
 /* Approximation for single-precision vector tanh(x), using a simplified
    version of expm1f. The maximum error is 2.58 ULP:
    _ZGVnN4v_tanhf (0x1.fa5eep-5) got 0x1.f9ba02p-5
 				want 0x1.f9ba08p-5.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (tanh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -42,7 +44,9 @@ float32x4_t VPCS_ATTR V_NAME_F1 (tanh) (float32x4_t x)
   uint32x4_t iax = vreinterpretq_u32_f32 (ax);
   uint32x4_t sign = veorq_u32 (ix, iax);
   uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound);
-  float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef));
+  /* expm1 exponent bias is 1.0f reinterpreted to int.  */
+  float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (
+      sign, vreinterpretq_u32_s32 (d->expm1f_consts.exponent_bias)));
 
 #if WANT_SIMD_EXCEPT
   /* If fp exceptions are to be triggered properly, set all special and boring
@@ -58,16 +62,20 @@ float32x4_t VPCS_ATTR V_NAME_F1 (tanh) (float32x4_t x)
 
   /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
   float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts);
-  float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
+
   if (unlikely (v_any_u32 (special)))
-    return special_case (vreinterpretq_f32_u32 (ix),
-			 vbslq_f32 (is_boring, boring, y), special);
+    return special_case (vreinterpretq_f32_u32 (ix), is_boring, boring, q,
+			 special);
+
+  float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
   return vbslq_f32 (is_boring, boring, y);
 }
 
-PL_SIG (V, F, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (tanh), 2.09)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (tanh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0, 0x1p-23, 1000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1p-23, 0x1.205966p+3, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1.205966p+3, inf, 100)
+HALF_WIDTH_ALIAS_F1 (tanh)
+
+TEST_SIG (V, F, 1, tanh, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (tanh), 2.09)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (tanh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0, 0x1p-23, 1000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1p-23, 0x1.205966p+3, 100000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1.205966p+3, inf, 100)
diff --git a/math/aarch64/advsimd/tanpi.c b/math/aarch64/advsimd/tanpi.c
new file mode 100644
index 00000000000000..16de00ad555666
--- /dev/null
+++ b/math/aarch64/advsimd/tanpi.c
@@ -0,0 +1,88 @@
+/*
+ * Double-precision vector tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+const static struct v_tanpi_data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10, c12;
+  double c1, c3, c5, c7, c9, c11, c13, c14;
+} tanpi_data = {
+  /* Coefficents for tan(pi * x) computed with fpminimax
+     on [ 0x1p-1022 0x1p-2 ]
+     approx rel error: 0x1.7eap-55
+     approx abs error: 0x1.7eap-55.  */
+  .c0 = V2 (0x1.921fb54442d18p1), /* pi.  */
+  .c1 = 0x1.4abbce625be52p3,	  .c2 = V2 (0x1.466bc6775b0f9p5),
+  .c3 = 0x1.45fff9b426f5ep7,	  .c4 = V2 (0x1.45f4730dbca5cp9),
+  .c5 = 0x1.45f3265994f85p11,	  .c6 = V2 (0x1.45f4234b330cap13),
+  .c7 = 0x1.45dca11be79ebp15,	  .c8 = V2 (0x1.47283fc5eea69p17),
+  .c9 = 0x1.3a6d958cdefaep19,	  .c10 = V2 (0x1.927896baee627p21),
+  .c11 = -0x1.89333f6acd922p19,	  .c12 = V2 (0x1.5d4e912bb8456p27),
+  .c13 = -0x1.a854d53ab6874p29,	  .c14 = 0x1.1b76de7681424p32,
+};
+
+/* Approximation for double-precision vector tanpi(x)
+   The maximum error is 3.06 ULP:
+   _ZGVnN2v_tanpi(0x1.0a4a07dfcca3ep-1) got -0x1.fa30112702c98p+3
+				       want -0x1.fa30112702c95p+3.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (tanpi) (float64x2_t x)
+{
+  const struct v_tanpi_data *d = ptr_barrier (&tanpi_data);
+
+  float64x2_t n = vrndnq_f64 (x);
+
+  /* inf produces nan that propagates.  */
+  float64x2_t xr = vsubq_f64 (x, n);
+  float64x2_t ar = vabdq_f64 (x, n);
+  uint64x2_t flip = vcgtq_f64 (ar, v_f64 (0.25));
+  float64x2_t r = vbslq_f64 (flip, vsubq_f64 (v_f64 (0.5), ar), ar);
+
+  /* Order-14 pairwise Horner.  */
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t r4 = vmulq_f64 (r2, r2);
+
+  float64x2_t c_1_3 = vld1q_f64 (&d->c1);
+  float64x2_t c_5_7 = vld1q_f64 (&d->c5);
+  float64x2_t c_9_11 = vld1q_f64 (&d->c9);
+  float64x2_t c_13_14 = vld1q_f64 (&d->c13);
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, r2, c_1_3, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, r2, c_1_3, 1);
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, r2, c_5_7, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, r2, c_5_7, 1);
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, r2, c_9_11, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, r2, c_9_11, 1);
+  float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, r2, c_13_14, 0);
+
+  float64x2_t p = vfmaq_laneq_f64 (p1213, r4, c_13_14, 1);
+  p = vfmaq_f64 (p1011, r4, p);
+  p = vfmaq_f64 (p89, r4, p);
+  p = vfmaq_f64 (p67, r4, p);
+  p = vfmaq_f64 (p45, r4, p);
+  p = vfmaq_f64 (p23, r4, p);
+  p = vfmaq_f64 (p01, r4, p);
+  p = vmulq_f64 (r, p);
+
+  float64x2_t p_recip = vdivq_f64 (v_f64 (1.0), p);
+  float64x2_t y = vbslq_f64 (flip, p_recip, p);
+
+  uint64x2_t sign
+      = veorq_u64 (vreinterpretq_u64_f64 (xr), vreinterpretq_u64_f64 (ar));
+  return vreinterpretq_f64_u64 (vorrq_u64 (vreinterpretq_u64_f64 (y), sign));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (V_NAME_D1 (tanpi))
+TEST_ULP (V_NAME_D1 (tanpi), 2.57)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 0, 0x1p-31, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 0x1p-31, 0.5, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 0.5, 1.0, 200000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 1.0, 0x1p23, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 0x1p23, inf, 50000)
+#endif
diff --git a/math/aarch64/advsimd/tanpif.c b/math/aarch64/advsimd/tanpif.c
new file mode 100644
index 00000000000000..7bd6d206819f82
--- /dev/null
+++ b/math/aarch64/advsimd/tanpif.c
@@ -0,0 +1,70 @@
+/*
+ * Single-precision vector tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+const static struct v_tanpif_data
+{
+  float32x4_t c0, c2, c4, c6;
+  float c1, c3, c5, c7;
+} tanpif_data = {
+  /* Coefficents for tan(pi * x).  */
+  .c0 = V4 (0x1.921fb4p1f),  .c1 = 0x1.4abbcep3f,      .c2 = V4 (0x1.466b8p5f),
+  .c3 = 0x1.461c72p7f,	     .c4 = V4 (0x1.42e9d4p9f), .c5 = 0x1.69e2c4p11f,
+  .c6 = V4 (0x1.e85558p11f), .c7 = 0x1.a52e08p16f,
+};
+
+/* Approximation for single-precision vector tanpi(x)
+   The maximum error is 3.34 ULP:
+   _ZGVnN4v_tanpif(0x1.d6c09ap-2) got 0x1.f70aacp+2
+				 want 0x1.f70aa6p+2.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanpi) (float32x4_t x)
+{
+  const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
+
+  float32x4_t n = vrndnq_f32 (x);
+
+  /* inf produces nan that propagates.  */
+  float32x4_t xr = vsubq_f32 (x, n);
+  float32x4_t ar = vabdq_f32 (x, n);
+  uint32x4_t flip = vcgtq_f32 (ar, v_f32 (0.25f));
+  float32x4_t r = vbslq_f32 (flip, vsubq_f32 (v_f32 (0.5f), ar), ar);
+
+  /* Order-7 pairwise Horner polynomial evaluation scheme.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t r4 = vmulq_f32 (r2, r2);
+
+  float32x4_t odd_coeffs = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, r2, odd_coeffs, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, r2, odd_coeffs, 1);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, r2, odd_coeffs, 2);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, r2, odd_coeffs, 3);
+  float32x4_t p = vfmaq_f32 (p45, r4, p67);
+  p = vfmaq_f32 (p23, r4, p);
+  p = vfmaq_f32 (p01, r4, p);
+
+  p = vmulq_f32 (r, p);
+  float32x4_t p_recip = vdivq_f32 (v_f32 (1.0f), p);
+  float32x4_t y = vbslq_f32 (flip, p_recip, p);
+
+  uint32x4_t sign
+      = veorq_u32 (vreinterpretq_u32_f32 (xr), vreinterpretq_u32_f32 (ar));
+  return vreinterpretq_f32_u32 (vorrq_u32 (vreinterpretq_u32_f32 (y), sign));
+}
+
+HALF_WIDTH_ALIAS_F1 (tanpi)
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (V_NAME_F1 (tanpi))
+TEST_ULP (V_NAME_F1 (tanpi), 2.84)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanpi), 0, 0x1p-31, 50000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanpi), 0x1p-31, 0.5, 100000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanpi), 0.5, 0x1p23f, 100000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanpi), 0x1p23f, inf, 100000)
+#endif
diff --git a/math/aarch64/advsimd/v_expf_inline.h b/math/aarch64/advsimd/v_expf_inline.h
new file mode 100644
index 00000000000000..797d217820c3bb
--- /dev/null
+++ b/math/aarch64/advsimd/v_expf_inline.h
@@ -0,0 +1,58 @@
+/*
+ * Helper for single-precision routines which calculate exp(ax) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_V_EXPF_INLINE_H
+#define MATH_V_EXPF_INLINE_H
+
+#include "v_math.h"
+
+struct v_expf_data
+{
+  float ln2_hi, ln2_lo, c0, c2;
+  float32x4_t inv_ln2, c1, c3, c4;
+  /* asuint(1.0f).  */
+  uint32x4_t exponent_bias;
+};
+
+/* maxerr: 1.45358 +0.5 ulp.  */
+#define V_EXPF_DATA                                                           \
+  {                                                                           \
+    .c0 = 0x1.0e4020p-7f, .c1 = V4 (0x1.573e2ep-5f), .c2 = 0x1.555e66p-3f,    \
+    .c3 = V4 (0x1.fffdb6p-2f), .c4 = V4 (0x1.ffffecp-1f),                     \
+    .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f,                        \
+    .inv_ln2 = V4 (0x1.715476p+0f), .exponent_bias = V4 (0x3f800000),         \
+  }
+
+static inline float32x4_t
+v_expf_inline (float32x4_t x, const struct v_expf_data *d)
+{
+  /* Helper routine for calculating exp(ax).
+     Copied from v_expf.c, with all special-case handling removed - the
+     calling routine should handle special values if required.  */
+
+  /* exp(ax) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     ax = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+  float32x4_t ax = vabsq_f32 (x);
+  float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi);
+  float32x4_t n = vrndaq_f32 (vmulq_f32 (ax, d->inv_ln2));
+  float32x4_t r = vfmsq_laneq_f32 (ax, n, ln2_c02, 0);
+  r = vfmsq_laneq_f32 (r, n, ln2_c02, 1);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+
+  /* Custom order-4 Estrin avoids building high order monomial.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2);
+  float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3);
+  q = vfmaq_f32 (q, p, r2);
+  p = vmulq_f32 (d->c4, r);
+  float32x4_t poly = vfmaq_f32 (p, q, r2);
+  return vfmaq_f32 (scale, poly, scale);
+}
+
+#endif // MATH_V_EXPF_INLINE_H
diff --git a/math/aarch64/advsimd/v_expm1_inline.h b/math/aarch64/advsimd/v_expm1_inline.h
new file mode 100644
index 00000000000000..82d2e9415d93b8
--- /dev/null
+++ b/math/aarch64/advsimd/v_expm1_inline.h
@@ -0,0 +1,86 @@
+/*
+ * Helper for double-precision routines which calculate exp(x) - 1 and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_V_EXPM1_INLINE_H
+#define MATH_V_EXPM1_INLINE_H
+
+#include "v_math.h"
+
+struct v_expm1_data
+{
+  float64x2_t c2, c4, c6, c8;
+  float64x2_t invln2;
+  int64x2_t exponent_bias;
+  double c1, c3, c5, c7, c9, c10;
+  double ln2[2];
+};
+
+/* Generated using fpminimax, with degree=12 in [log(2)/2, log(2)/2].  */
+#define V_EXPM1_DATA                                                          \
+  {                                                                           \
+    .c1 = 0x1.5555555555559p-3, .c2 = V2 (0x1.555555555554bp-5),              \
+    .c3 = 0x1.111111110f663p-7, .c4 = V2 (0x1.6c16c16c1b5f3p-10),             \
+    .c5 = 0x1.a01a01affa35dp-13, .c6 = V2 (0x1.a01a018b4ecbbp-16),            \
+    .c7 = 0x1.71ddf82db5bb4p-19, .c8 = V2 (0x1.27e517fc0d54bp-22),            \
+    .c9 = 0x1.af5eedae67435p-26, .c10 = 0x1.1f143d060a28ap-29,                \
+    .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },                   \
+    .invln2 = V2 (0x1.71547652b82fep0),                                       \
+    .exponent_bias = V2 (0x3ff0000000000000),                                 \
+  }
+
+static inline float64x2_t
+expm1_inline (float64x2_t x, const struct v_expm1_data *d)
+{
+  /* Helper routine for calculating exp(x) - 1.  */
+
+  float64x2_t ln2 = vld1q_f64 (&d->ln2[0]);
+
+  /* Reduce argument to smaller range:
+     Let i = round(x / ln2)
+     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+     where 2^i is exact because i is an integer.  */
+  float64x2_t n = vrndaq_f64 (vmulq_f64 (x, d->invln2));
+  int64x2_t i = vcvtq_s64_f64 (n);
+  float64x2_t f = vfmsq_laneq_f64 (x, n, ln2, 0);
+  f = vfmsq_laneq_f64 (f, n, ln2, 1);
+
+  /* Approximate expm1(f) using polynomial.
+     Taylor expansion for expm1(x) has the form:
+	 x + ax^2 + bx^3 + cx^4 ....
+     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
+  float64x2_t f2 = vmulq_f64 (f, f);
+  float64x2_t f4 = vmulq_f64 (f2, f2);
+  float64x2_t lane_consts_13 = vld1q_f64 (&d->c1);
+  float64x2_t lane_consts_57 = vld1q_f64 (&d->c5);
+  float64x2_t lane_consts_910 = vld1q_f64 (&d->c9);
+  float64x2_t p01 = vfmaq_laneq_f64 (v_f64 (0.5), f, lane_consts_13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, f, lane_consts_13, 1);
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, f, lane_consts_57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, f, lane_consts_57, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, f2, p23);
+  float64x2_t p47 = vfmaq_f64 (p45, f2, p67);
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, f, lane_consts_910, 0);
+  float64x2_t p = vfmaq_laneq_f64 (p89, f2, lane_consts_910, 1);
+  p = vfmaq_f64 (p47, f4, p);
+  p = vfmaq_f64 (p03, f4, p);
+
+  p = vfmaq_f64 (f, f2, p);
+
+  /* Assemble the result.
+     expm1(x) ~= 2^i * (p + 1) - 1
+     Let t = 2^i.  */
+  int64x2_t u = vaddq_s64 (vshlq_n_s64 (i, 52), d->exponent_bias);
+  float64x2_t t = vreinterpretq_f64_s64 (u);
+
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
+}
+
+#endif // MATH_V_EXPM1_INLINE_H
diff --git a/math/aarch64/advsimd/v_expm1f_inline.h b/math/aarch64/advsimd/v_expm1f_inline.h
new file mode 100644
index 00000000000000..463b07aa7705d6
--- /dev/null
+++ b/math/aarch64/advsimd/v_expm1f_inline.h
@@ -0,0 +1,62 @@
+/*
+ * Helper for single-precision routines which calculate exp(x) - 1 and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_V_EXPM1F_INLINE_H
+#define MATH_V_EXPM1F_INLINE_H
+
+#include "v_math.h"
+
+struct v_expm1f_data
+{
+  float32x4_t c0, c2;
+  int32x4_t exponent_bias;
+  float c1, c3, inv_ln2, c4;
+  float ln2_hi, ln2_lo;
+};
+
+/* Coefficients generated using fpminimax with degree=5 in [-log(2)/2,
+   log(2)/2]. Exponent bias is asuint(1.0f).  */
+#define V_EXPM1F_DATA                                                         \
+  {                                                                           \
+    .c0 = V4 (0x1.fffffep-2), .c1 = 0x1.5554aep-3, .c2 = V4 (0x1.555736p-5),  \
+    .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10,                                \
+    .exponent_bias = V4 (0x3f800000), .inv_ln2 = 0x1.715476p+0f,              \
+    .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f,                        \
+  }
+
+static inline float32x4_t
+expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
+{
+  /* Helper routine for calculating exp(x) - 1.  */
+
+  float32x2_t ln2 = vld1_f32 (&d->ln2_hi);
+  float32x4_t lane_consts = vld1q_f32 (&d->c1);
+
+  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
+  float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2));
+  int32x4_t i = vcvtq_s32_f32 (j);
+  float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0);
+  f = vfmsq_lane_f32 (f, j, ln2, 1);
+
+  /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).  */
+  float32x4_t f2 = vmulq_f32 (f, f);
+  float32x4_t f4 = vmulq_f32 (f2, f2);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1);
+  float32x4_t p = vfmaq_f32 (p01, f2, p23);
+  p = vfmaq_laneq_f32 (p, f4, lane_consts, 3);
+  p = vfmaq_f32 (f, f2, p);
+
+  /* t = 2^i.  */
+  int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
+  float32x4_t t = vreinterpretq_f32_s32 (u);
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
+}
+
+#endif // MATH_V_EXPM1F_INLINE_H
diff --git a/math/aarch64/advsimd/v_log1p_inline.h b/math/aarch64/advsimd/v_log1p_inline.h
new file mode 100644
index 00000000000000..ef906ae4b6033c
--- /dev/null
+++ b/math/aarch64/advsimd/v_log1p_inline.h
@@ -0,0 +1,119 @@
+/*
+ * Helper for vector double-precision routines which calculate log(1 + x) and
+ * do not need special-case handling
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#ifndef MATH_V_LOG1P_INLINE_H
+#define MATH_V_LOG1P_INLINE_H
+
+#include "v_math.h"
+
+struct v_log1p_data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16;
+  uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask;
+  int64x2_t one_top;
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18;
+  double ln2[2];
+};
+
+/* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].  */
+#define V_LOG1P_CONSTANTS_TABLE                                               \
+  {                                                                           \
+    .c0 = V2 (-0x1.ffffffffffffbp-2), .c1 = 0x1.55555555551a9p-2,             \
+    .c2 = V2 (-0x1.00000000008e3p-2), .c3 = 0x1.9999999a32797p-3,             \
+    .c4 = V2 (-0x1.555555552fecfp-3), .c5 = 0x1.249248e071e5ap-3,             \
+    .c6 = V2 (-0x1.ffffff8bf8482p-4), .c7 = 0x1.c71c8f07da57ap-4,             \
+    .c8 = V2 (-0x1.9999ca4ccb617p-4), .c9 = 0x1.7459ad2e1dfa3p-4,             \
+    .c10 = V2 (-0x1.554d2680a3ff2p-4), .c11 = 0x1.3b4c54d487455p-4,           \
+    .c12 = V2 (-0x1.2548a9ffe80e6p-4), .c13 = 0x1.0f389a24b2e07p-4,           \
+    .c14 = V2 (-0x1.eee4db15db335p-5), .c15 = 0x1.e95b494d4a5ddp-5,           \
+    .c16 = V2 (-0x1.15fdf07cb7c73p-4), .c17 = 0x1.0310b70800fcfp-4,           \
+    .c18 = -0x1.cfa7385bdb37ep-6,                                             \
+    .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 },                   \
+    .hf_rt2_top = V2 (0x3fe6a09e00000000),                                    \
+    .one_m_hf_rt2_top = V2 (0x00095f6200000000),                              \
+    .umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff)                   \
+  }
+
+#define BottomMask v_u64 (0xffffffff)
+
+static inline float64x2_t
+eval_poly (float64x2_t m, float64x2_t m2, const struct v_log1p_data *d)
+{
+  /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+  float64x2_t c1315 = vld1q_f64 (&d->c13);
+  float64x2_t c1718 = vld1q_f64 (&d->c17);
+  float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, m, c1718, 0);
+  float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, m, c1315, 1);
+  float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, m, c1315, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, m, c911, 1);
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, m, c911, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, m, c57, 1);
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, m, c57, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, m, c13, 1);
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, m, c13, 0);
+  float64x2_t p = vfmaq_laneq_f64 (p1617, m2, c1718, 1);
+  p = vfmaq_f64 (p1415, m2, p);
+  p = vfmaq_f64 (p1213, m2, p);
+  p = vfmaq_f64 (p1011, m2, p);
+  p = vfmaq_f64 (p89, m2, p);
+  p = vfmaq_f64 (p67, m2, p);
+  p = vfmaq_f64 (p45, m2, p);
+  p = vfmaq_f64 (p23, m2, p);
+  return vfmaq_f64 (p01, m2, p);
+}
+
+static inline float64x2_t
+log1p_inline (float64x2_t x, const struct v_log1p_data *d)
+{
+  /* Helper for calculating log(x + 1):
+     - No special-case handling - this should be dealt with by the caller.
+     - Optionally simulate the shortcut for k=0, used in the scalar routine,
+       using v_sel, for improved accuracy when the argument to log1p is close
+       to 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1
+       in the source of the caller before including this file.  */
+  float64x2_t m = vaddq_f64 (x, v_f64 (1.0));
+  uint64x2_t mi = vreinterpretq_u64_f64 (m);
+  uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
+
+  int64x2_t ki
+      = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
+  float64x2_t k = vcvtq_f64_s64 (ki);
+
+  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
+  uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
+  uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
+  float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1.0));
+
+  /* Correction term c/m.  */
+  float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1.0))), m);
+
+#ifndef WANT_V_LOG1P_K0_SHORTCUT
+# error                                                                       \
+      "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+#elif WANT_V_LOG1P_K0_SHORTCUT
+  /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
+     that the approximation is solely the polynomial.  */
+  uint64x2_t k0 = vceqzq_f64 (k);
+  cm = v_zerofy_f64 (cm, k0);
+  f = vbslq_f64 (k0, x, f);
+#endif
+
+  /* Approximate log1p(f) on the reduced input using a polynomial.  */
+  float64x2_t f2 = vmulq_f64 (f, f);
+  float64x2_t p = eval_poly (f, f2, d);
+
+  /* Assemble log1p(x) = k * log2 + log1p(f) + c/m.  */
+  float64x2_t ln2 = vld1q_f64 (&d->ln2[0]);
+  float64x2_t ylo = vfmaq_laneq_f64 (cm, k, ln2, 1);
+  float64x2_t yhi = vfmaq_laneq_f64 (f, k, ln2, 0);
+  return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p);
+}
+
+#endif // MATH_V_LOG1P_INLINE_H
diff --git a/math/aarch64/advsimd/v_log1pf_inline.h b/math/aarch64/advsimd/v_log1pf_inline.h
new file mode 100644
index 00000000000000..e81fa24486aeda
--- /dev/null
+++ b/math/aarch64/advsimd/v_log1pf_inline.h
@@ -0,0 +1,94 @@
+/*
+ * Helper for single-precision routines which calculate log(1 + x) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_V_LOG1PF_INLINE_H
+#define MATH_V_LOG1PF_INLINE_H
+
+#include "v_math.h"
+#include "v_poly_f32.h"
+
+struct v_log1pf_data
+{
+  uint32x4_t four;
+  int32x4_t three_quarters;
+  float c0, c3, c5, c7;
+  float32x4_t c4, c6, c1, c2, ln2;
+};
+
+/* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients
+   (1, -0.5) are not stored as they can be generated more efficiently.  */
+#define V_LOG1PF_CONSTANTS_TABLE                                              \
+  {                                                                           \
+    .c0 = 0x1.5555aap-2f, .c1 = V4 (-0x1.000038p-2f),                         \
+    .c2 = V4 (0x1.99675cp-3f), .c3 = -0x1.54ef78p-3f,                         \
+    .c4 = V4 (0x1.28a1f4p-3f), .c5 = -0x1.0da91p-3f,                          \
+    .c6 = V4 (0x1.abcb6p-4f), .c7 = -0x1.6f0d5ep-5f,                          \
+    .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000),                       \
+    .three_quarters = V4 (0x3f400000)                                         \
+  }
+
+static inline float32x4_t
+eval_poly (float32x4_t m, const struct v_log1pf_data *d)
+{
+  /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner.  */
+  float32x4_t c0357 = vld1q_f32 (&d->c0);
+  float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0);
+  float32x4_t m2 = vmulq_f32 (m, m);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1);
+  float32x4_t p = vfmaq_f32 (p45, m2, p67);
+  p = vfmaq_f32 (p23, m2, p);
+  p = vfmaq_f32 (d->c1, m, p);
+  p = vmulq_f32 (m2, p);
+  p = vfmaq_f32 (m, m2, p);
+  return vfmaq_f32 (p, m2, q);
+}
+
+static inline float32x4_t
+log1pf_inline (float32x4_t x, const struct v_log1pf_data *d)
+{
+  /* Helper for calculating log(x + 1).  */
+
+  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+			   is in [-0.25, 0.5]):
+     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+     We approximate log1p(m) with a polynomial, then scale by
+     k*log(2). Instead of doing this directly, we use an intermediate
+     scale factor s = 4*k*log(2) to ensure the scale is representable
+     as a normalised fp32 number.  */
+  float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+
+  /* Choose k to scale x to the range [-1/4, 1/2].  */
+  int32x4_t k
+      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
+		   v_s32 (0xff800000));
+  uint32x4_t ku = vreinterpretq_u32_s32 (k);
+
+  /* Scale up to ensure that the scale factor is representable as normalised
+     fp32 number, and scale m down accordingly.  */
+  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
+
+  /* Scale x by exponent manipulation.  */
+  float32x4_t m_scale
+      = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
+  m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
+
+  /* Evaluate polynomial on the reduced interval.  */
+  float32x4_t p = eval_poly (m_scale, d);
+
+  /* The scale factor to be applied back at the end - by multiplying float(k)
+     by 2^-23 we get the unbiased exponent of k.  */
+  float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f));
+
+  /* Apply the scaling back.  */
+  return vfmaq_f32 (p, scale_back, d->ln2);
+}
+
+#endif //  MATH_V_LOG1PF_INLINE_H
diff --git a/pl/math/v_log_inline.h b/math/aarch64/advsimd/v_log_inline.h
similarity index 94%
rename from pl/math/v_log_inline.h
rename to math/aarch64/advsimd/v_log_inline.h
index 2df00cf4ddf4c2..770f9e81c19532 100644
--- a/pl/math/v_log_inline.h
+++ b/math/aarch64/advsimd/v_log_inline.h
@@ -1,7 +1,7 @@
 /*
  * Double-precision vector log(x) function - inline version
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -57,8 +57,8 @@ log_lookup (uint64x2_t i)
 {
   /* Since N is a power of 2, n % N = n & (N - 1).  */
   struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
   float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
   float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
   e.invc = vuzp1q_f64 (e0, e1);
diff --git a/pl/math/v_math.h b/math/aarch64/advsimd/v_math.h
similarity index 58%
rename from pl/math/v_math.h
rename to math/aarch64/advsimd/v_math.h
index 1b10929facccfb..75cd71cc87a77c 100644
--- a/pl/math/v_math.h
+++ b/math/aarch64/advsimd/v_math.h
@@ -1,36 +1,63 @@
 /*
  * Vector math abstractions.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef _V_MATH_H
 #define _V_MATH_H
 
-#ifndef WANT_VMATH
-/* Enable the build of vector math code.  */
-# define WANT_VMATH 1
+#if !__aarch64__
+# error "Cannot build without AArch64"
 #endif
 
-#if WANT_VMATH
-
-# if __aarch64__
-#  define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
-# else
-#  error "Cannot build without AArch64"
-# endif
-
-# include <stdint.h>
-# include "math_config.h"
-# if __aarch64__
+#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
+
+#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
+#define V_NAME_D1(fun) _ZGVnN2v_##fun
+#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
+#define V_NAME_D2(fun) _ZGVnN2vv_##fun
+#define V_NAME_F1_L1(fun) _ZGVnN4vl4_##fun##f
+#define V_NAME_D1_L1(fun) _ZGVnN2vl8_##fun
+
+#if USE_GLIBC_ABI
+
+# define HALF_WIDTH_ALIAS_F1(fun)                                             \
+    float32x2_t VPCS_ATTR _ZGVnN2v_##fun##f (float32x2_t x)                   \
+    {                                                                         \
+      return vget_low_f32 (_ZGVnN4v_##fun##f (vcombine_f32 (x, x)));          \
+    }
+
+# define HALF_WIDTH_ALIAS_F2(fun)                                             \
+    float32x2_t VPCS_ATTR _ZGVnN2vv_##fun##f (float32x2_t x, float32x2_t y)   \
+    {                                                                         \
+      return vget_low_f32 (                                                   \
+	  _ZGVnN4vv_##fun##f (vcombine_f32 (x, x), vcombine_f32 (y, y)));     \
+    }
+
+#else
+# define HALF_WIDTH_ALIAS_F1(fun)
+# define HALF_WIDTH_ALIAS_F2(fun)
+#endif
 
-#  include <arm_neon.h>
+#include <stdint.h>
+#include "math_config.h"
+#include <arm_neon.h>
 
 /* Shorthand helpers for declaring constants.  */
-#  define V2(X) { X, X }
-#  define V4(X) { X, X, X, X }
-#  define V8(X) { X, X, X, X, X, X, X, X }
+#define V2(X)                                                                 \
+  {                                                                           \
+    X, X                                                                      \
+  }
+#define V4(X)                                                                 \
+  {                                                                           \
+    X, X, X, X                                                                \
+  }
+#define V8(X)                                                                 \
+  {                                                                           \
+    X, X, X, X, X, X, X, X                                                    \
+  }
 
 static inline int
 v_any_u16h (uint16x4_t x)
@@ -38,6 +65,12 @@ v_any_u16h (uint16x4_t x)
   return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
 }
 
+static inline int
+v_lanes32 (void)
+{
+  return 4;
+}
+
 static inline float32x4_t
 v_f32 (float x)
 {
@@ -54,7 +87,7 @@ v_s32 (int32_t x)
   return (int32x4_t) V4 (x);
 }
 
-/* true if any elements of a vector compare result is non-zero.  */
+/* true if any elements of a v_cond result is non-zero.  */
 static inline int
 v_any_u32 (uint32x4_t x)
 {
@@ -97,6 +130,11 @@ v_zerofy_f32 (float32x4_t x, uint32x4_t mask)
   return vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), mask));
 }
 
+static inline int
+v_lanes64 (void)
+{
+  return 2;
+}
 static inline float64x2_t
 v_f64 (double x)
 {
@@ -113,20 +151,13 @@ v_s64 (int64_t x)
   return (int64x2_t) V2 (x);
 }
 
-/* true if any elements of a vector compare result is non-zero.  */
+/* true if any elements of a v_cond result is non-zero.  */
 static inline int
 v_any_u64 (uint64x2_t x)
 {
   /* assume elements in x are either 0 or -1u.  */
   return vpaddd_u64 (x) != 0;
 }
-/* true if all elements of a vector compare result is 1.  */
-static inline int
-v_all_u64 (uint64x2_t x)
-{
-  /* assume elements in x are either 0 or -1u.  */
-  return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
-}
 static inline float64x2_t
 v_lookup_f64 (const double *tab, uint64x2_t idx)
 {
@@ -137,7 +168,6 @@ v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
 {
   return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
 }
-
 static inline float64x2_t
 v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
 {
@@ -169,7 +199,4 @@ v_zerofy_f64 (float64x2_t x, uint64x2_t mask)
   return vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), mask));
 }
 
-# endif
-#endif
-
 #endif
diff --git a/pl/math/poly_advsimd_f32.h b/math/aarch64/advsimd/v_poly_f32.h
similarity index 81%
rename from pl/math/poly_advsimd_f32.h
rename to math/aarch64/advsimd/v_poly_f32.h
index 438e153dff90c6..9a9c5c1ac15b34 100644
--- a/pl/math/poly_advsimd_f32.h
+++ b/math/aarch64/advsimd/v_poly_f32.h
@@ -2,12 +2,12 @@
  * Helpers for evaluating polynomials on single-precision AdvSIMD input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_ADVSIMD_F32_H
-#define PL_MATH_POLY_ADVSIMD_F32_H
+#ifndef MATH_POLY_ADVSIMD_F32_H
+#define MATH_POLY_ADVSIMD_F32_H
 
 #include <arm_neon.h>
 
diff --git a/pl/math/poly_advsimd_f64.h b/math/aarch64/advsimd/v_poly_f64.h
similarity index 81%
rename from pl/math/poly_advsimd_f64.h
rename to math/aarch64/advsimd/v_poly_f64.h
index 7ea249a9122554..4331bfbd03b0c1 100644
--- a/pl/math/poly_advsimd_f64.h
+++ b/math/aarch64/advsimd/v_poly_f64.h
@@ -2,12 +2,12 @@
  * Helpers for evaluating polynomials on double-precision AdvSIMD input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_ADVSIMD_F64_H
-#define PL_MATH_POLY_ADVSIMD_F64_H
+#ifndef MATH_POLY_ADVSIMD_F64_H
+#define MATH_POLY_ADVSIMD_F64_H
 
 #include <arm_neon.h>
 
diff --git a/pl/math/v_sincos_common.h b/math/aarch64/advsimd/v_sincos_common.h
similarity index 97%
rename from pl/math/v_sincos_common.h
rename to math/aarch64/advsimd/v_sincos_common.h
index ee7937e0785ae6..14227d9339a881 100644
--- a/pl/math/v_sincos_common.h
+++ b/math/aarch64/advsimd/v_sincos_common.h
@@ -1,12 +1,12 @@
 /*
  * Core approximation for double-precision vector sincos
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
+#include "v_poly_f64.h"
 
 static const struct v_sincos_data
 {
diff --git a/pl/math/v_sincosf_common.h b/math/aarch64/advsimd/v_sincosf_common.h
similarity index 98%
rename from pl/math/v_sincosf_common.h
rename to math/aarch64/advsimd/v_sincosf_common.h
index 8239bd9f01763e..7c29eded14d68e 100644
--- a/pl/math/v_sincosf_common.h
+++ b/math/aarch64/advsimd/v_sincosf_common.h
@@ -1,7 +1,7 @@
 /*
  * Core approximation for single-precision vector sincos
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/math/aarch64/advsimd/v_sincospi_common.h b/math/aarch64/advsimd/v_sincospi_common.h
new file mode 100644
index 00000000000000..438b141b9174de
--- /dev/null
+++ b/math/aarch64/advsimd/v_sincospi_common.h
@@ -0,0 +1,64 @@
+/*
+ * Helper for Double-precision vector sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_math.h"
+#include "v_poly_f64.h"
+
+static const struct v_sincospi_data
+{
+  float64x2_t poly[10], range_val;
+} v_sincospi_data = {
+  /* Polynomial coefficients generated using Remez algorithm,
+     see sinpi.sollya for details.  */
+  .poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2),
+	    V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1),
+	    V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8),
+	    V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16),
+	    V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) },
+  .range_val = V2 (0x1p63),
+};
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+   one function call, using separate argument reduction and shared low-order
+   polynomials.
+   Approximation for vector double-precision sincospi(x).
+   Maximum Error 3.09 ULP:
+  _ZGVnN2v_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
+					      want 0x1.fd54d0b327cf4p-1
+   Maximum Error 3.16 ULP:
+  _ZGVnN2v_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
+					      want 0x1.fd2da484ff402p-1.  */
+static inline float64x2x2_t
+v_sincospi_inline (float64x2_t x, const struct v_sincospi_data *d)
+{
+  /* If r is odd, the sign of the result should be inverted for sinpi
+     and reintroduced for cospi.  */
+  uint64x2_t cmp = vcgeq_f64 (x, d->range_val);
+  uint64x2_t odd = vshlq_n_u64 (
+      vbicq_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (x)), cmp), 63);
+
+  /* r = x - rint(x).  */
+  float64x2_t sr = vsubq_f64 (x, vrndaq_f64 (x));
+  /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
+  float64x2_t cr = vsubq_f64 (v_f64 (0.5), vabsq_f64 (sr));
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  float64x2_t sr2 = vmulq_f64 (sr, sr);
+  float64x2_t sr4 = vmulq_f64 (sr2, sr2);
+  float64x2_t cr2 = vmulq_f64 (cr, cr);
+  float64x2_t cr4 = vmulq_f64 (cr2, cr2);
+
+  float64x2_t ss = vmulq_f64 (v_pw_horner_9_f64 (sr2, sr4, d->poly), sr);
+  float64x2_t cc = vmulq_f64 (v_pw_horner_9_f64 (cr2, cr4, d->poly), cr);
+
+  float64x2_t sinpix
+      = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (ss), odd));
+
+  float64x2_t cospix
+      = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (cc), odd));
+
+  return (float64x2x2_t){ sinpix, cospix };
+}
diff --git a/math/aarch64/advsimd/v_sincospif_common.h b/math/aarch64/advsimd/v_sincospif_common.h
new file mode 100644
index 00000000000000..8d4177dd871eb3
--- /dev/null
+++ b/math/aarch64/advsimd/v_sincospif_common.h
@@ -0,0 +1,57 @@
+/*
+ * Helper for Single-precision vector sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "mathlib.h"
+#include "v_math.h"
+#include "v_poly_f32.h"
+
+const static struct v_sincospif_data
+{
+  float32x4_t poly[6], range_val;
+} v_sincospif_data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f),
+	    V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) },
+  .range_val = V4 (0x1p31f),
+};
+
+/* Single-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+   Worst-case error for sin is 3.04 ULP:
+   _ZGVnN4v_sincospif_sin(0x1.1d341ap-1) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+   Worst-case error for cos is 3.18 ULP:
+   _ZGVnN4v_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+ */
+static inline float32x4x2_t
+v_sincospif_inline (float32x4_t x, const struct v_sincospif_data *d)
+{
+  /* If r is odd, the sign of the result should be inverted for sinpi and
+     reintroduced for cospi.  */
+  uint32x4_t cmp = vcgeq_f32 (x, d->range_val);
+  uint32x4_t odd = vshlq_n_u32 (
+      vbicq_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), cmp), 31);
+
+  /* r = x - rint(x).  */
+  float32x4_t sr = vsubq_f32 (x, vrndaq_f32 (x));
+  /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
+  float32x4_t cr = vsubq_f32 (v_f32 (0.5f), vabsq_f32 (sr));
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  float32x4_t sr2 = vmulq_f32 (sr, sr);
+  float32x4_t sr4 = vmulq_f32 (sr2, sr2);
+  float32x4_t cr2 = vmulq_f32 (cr, cr);
+  float32x4_t cr4 = vmulq_f32 (cr2, cr2);
+
+  float32x4_t ss = vmulq_f32 (v_pw_horner_5_f32 (sr2, sr4, d->poly), sr);
+  float32x4_t cc = vmulq_f32 (v_pw_horner_5_f32 (cr2, cr4, d->poly), cr);
+
+  float32x4_t sinpix
+      = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (ss), odd));
+  float32x4_t cospix
+      = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (cc), odd));
+
+  return (float32x4x2_t){ sinpix, cospix };
+}
diff --git a/pl/math/cospi_3u1.c b/math/aarch64/cospi_3u5.c
similarity index 82%
rename from pl/math/cospi_3u1.c
rename to math/aarch64/cospi_3u5.c
index 4a688a07682970..4131f6c816a199 100644
--- a/pl/math/cospi_3u1.c
+++ b/math/aarch64/cospi_3u5.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision scalar cospi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "poly_scalar_f64.h"
 
 /* Taylor series coefficents for sin(pi * x).
@@ -29,9 +29,9 @@ static const double poly[]
    cospi(0x1.160b129300112p-21) got 0x1.fffffffffd16bp-1
 			       want 0x1.fffffffffd16ep-1.  */
 double
-cospi (double x)
+arm_math_cospi (double x)
 {
-  if (isinf (x))
+  if (isinf (x) || isnan (x))
     return __math_invalid (x);
 
   double ax = asdouble (asuint64 (x) & ~0x8000000000000000);
@@ -81,9 +81,18 @@ cospi (double x)
   return asdouble (asuint64 (y) ^ sign);
 }
 
-PL_SIG (S, D, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (cospi, 2.63)
-PL_TEST_SYM_INTERVAL (cospi, 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (cospi, 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (cospi, 0.5, 0x1p51f, 10000)
-PL_TEST_SYM_INTERVAL (cospi, 0x1p51f, inf, 10000)
+#if WANT_EXPERIMENTAL_MATH
+double
+cospi (double x)
+{
+  return arm_math_cospi (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_cospi, 2.63)
+TEST_SYM_INTERVAL (arm_math_cospi, 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (arm_math_cospi, 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (arm_math_cospi, 0.5, 0x1p51f, 10000)
+TEST_SYM_INTERVAL (arm_math_cospi, 0x1p51f, inf, 10000)
+#endif
diff --git a/pl/math/cospif_2u6.c b/math/aarch64/cospif_2u6.c
similarity index 79%
rename from pl/math/cospif_2u6.c
rename to math/aarch64/cospif_2u6.c
index d78a98ed8b2d21..eb5b75402a63c8 100644
--- a/pl/math/cospif_2u6.c
+++ b/math/aarch64/cospif_2u6.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision scalar cospi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Taylor series coefficents for sin(pi * x).  */
 #define C0 0x1.921fb6p1f
@@ -25,9 +25,9 @@
    cospif(0x1.37e844p-4) got 0x1.f16b3p-1
 			want 0x1.f16b2ap-1.  */
 float
-cospif (float x)
+arm_math_cospif (float x)
 {
-  if (isinf (x))
+  if (isinf (x) || isnan (x))
     return __math_invalidf (x);
 
   float ax = asfloat (asuint (x) & ~0x80000000);
@@ -76,9 +76,18 @@ cospif (float x)
   return asfloat (asuint (y * r) ^ sign);
 }
 
-PL_SIG (S, F, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (cospif, 2.15)
-PL_TEST_SYM_INTERVAL (cospif, 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (cospif, 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (cospif, 0.5, 0x1p22f, 10000)
-PL_TEST_SYM_INTERVAL (cospif, 0x1p22f, inf, 10000)
+#if WANT_EXPERIMENTAL_MATH
+float
+cospif (float x)
+{
+  return arm_math_cospif (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_cospif, 2.15)
+TEST_SYM_INTERVAL (arm_math_cospif, 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (arm_math_cospif, 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (arm_math_cospif, 0.5, 0x1p22f, 10000)
+TEST_SYM_INTERVAL (arm_math_cospif, 0x1p22f, inf, 10000)
+#endif
diff --git a/pl/README.contributors b/math/aarch64/experimental/README.contributors
similarity index 71%
rename from pl/README.contributors
rename to math/aarch64/experimental/README.contributors
index 3af9b1fc7741d9..abb749485ba3fe 100644
--- a/pl/README.contributors
+++ b/math/aarch64/experimental/README.contributors
@@ -5,7 +5,6 @@ glibc-specific conventions need not be followed.
 The requirements for portable code apply to non-portable code with the
 following differences:
 
-
 1. Worst-case ULP error should be encoded in filenames (e.g. sin_u35.c). There
    are no specific restrictions on acceptable ULP error, but if functions
    provide significantly less accuracy than portable equivalents then a clear
@@ -15,9 +14,3 @@ following differences:
 
 2. Functions are assumed to support round-to-nearest mode by default, unless
    stated; other rounding modes are not required to be provided.
-
-3. Handling of special cases may be relaxed for vector functions. Checking
-   whether each vector lane contains special values such as NaN, Inf or
-   denormal numbers can prove too costly for vector functions. This is often
-   not required since vector functions are typically used along with aggressive
-   compiler optimization flags.
diff --git a/pl/math/acos_2u.c b/math/aarch64/experimental/acos_2u.c
similarity index 76%
rename from pl/math/acos_2u.c
rename to math/aarch64/experimental/acos_2u.c
index 9ec6894f1d8100..062215c92248ff 100644
--- a/pl/math/acos_2u.c
+++ b/math/aarch64/experimental/acos_2u.c
@@ -1,23 +1,23 @@
 /*
  * Double-precision acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include "poly_scalar_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define AbsMask (0x7fffffffffffffff)
-#define Half (0x3fe0000000000000)
-#define One (0x3ff0000000000000)
-#define PiOver2 (0x1.921fb54442d18p+0)
-#define Pi (0x1.921fb54442d18p+1)
-#define Small (0x3c90000000000000) /* 2^-53.  */
-#define Small16 (0x3c90)
-#define QNaN (0x7ff8)
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define One 0x3ff0000000000000
+#define PiOver2 0x1.921fb54442d18p+0
+#define Pi 0x1.921fb54442d18p+1
+#define Small 0x3c90000000000000 /* 2^-53.  */
+#define Small16 0x3c90
+#define QNaN 0x7ff8
 
 /* Fast implementation of double-precision acos(x) based on polynomial
    approximation of double-precision asin(x).
@@ -29,8 +29,8 @@
 
      acos(x) = pi/2 - asin(x)
 
-   and use an order 11 polynomial P such that the final approximation of asin is
-   an odd polynomial: asin(x) ~ x + x^3 * P(x^2).
+   and use an order 11 polynomial P such that the final approximation of asin
+   is an odd polynomial: asin(x) ~ x + x^3 * P(x^2).
 
    The largest observed error in this region is 1.18 ulps,
    acos(0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0
@@ -90,11 +90,11 @@ acos (double x)
   return (x <= -0.5) ? fma (-2.0, p, Pi) : 2.0 * p;
 }
 
-PL_SIG (S, D, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (acos, 1.02)
-PL_TEST_INTERVAL (acos, 0, Small, 5000)
-PL_TEST_INTERVAL (acos, Small, 0.5, 50000)
-PL_TEST_INTERVAL (acos, 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (acos, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (acos, 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (acos, -0, -inf, 20000)
+TEST_SIG (S, D, 1, acos, -1.0, 1.0)
+TEST_ULP (acos, 1.02)
+TEST_INTERVAL (acos, 0, Small, 5000)
+TEST_INTERVAL (acos, Small, 0.5, 50000)
+TEST_INTERVAL (acos, 0.5, 1.0, 50000)
+TEST_INTERVAL (acos, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (acos, 0x1p11, inf, 20000)
+TEST_INTERVAL (acos, -0, -inf, 20000)
diff --git a/pl/math/acosf_1u4.c b/math/aarch64/experimental/acosf_1u4.c
similarity index 79%
rename from pl/math/acosf_1u4.c
rename to math/aarch64/experimental/acosf_1u4.c
index 6dde422ef85a89..d207f5e89f26d4 100644
--- a/pl/math/acosf_1u4.c
+++ b/math/aarch64/experimental/acosf_1u4.c
@@ -1,23 +1,23 @@
 /*
  * Single-precision acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define AbsMask (0x7fffffff)
-#define Half (0x3f000000)
-#define One (0x3f800000)
-#define PiOver2f (0x1.921fb6p+0f)
-#define Pif (0x1.921fb6p+1f)
-#define Small (0x32800000) /* 2^-26.  */
-#define Small12 (0x328)
-#define QNaN (0x7fc)
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define PiOver2f 0x1.921fb6p+0f
+#define Pif 0x1.921fb6p+1f
+#define Small 0x32800000 /* 2^-26.  */
+#define Small12 0x328
+#define QNaN 0x7fc
 
 /* Fast implementation of single-precision acos(x) based on polynomial
    approximation of single-precision asin(x).
@@ -89,11 +89,11 @@ acosf (float x)
   return (x <= -0.5) ? fmaf (-2.0f, p, Pif) : 2.0f * p;
 }
 
-PL_SIG (S, F, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (acosf, 0.82)
-PL_TEST_INTERVAL (acosf, 0, Small, 5000)
-PL_TEST_INTERVAL (acosf, Small, 0.5, 50000)
-PL_TEST_INTERVAL (acosf, 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (acosf, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (acosf, 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (acosf, -0, -inf, 20000)
+TEST_SIG (S, F, 1, acos, -1.0, 1.0)
+TEST_ULP (acosf, 0.82)
+TEST_INTERVAL (acosf, 0, Small, 5000)
+TEST_INTERVAL (acosf, Small, 0.5, 50000)
+TEST_INTERVAL (acosf, 0.5, 1.0, 50000)
+TEST_INTERVAL (acosf, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (acosf, 0x1p11, inf, 20000)
+TEST_INTERVAL (acosf, -0, -inf, 20000)
diff --git a/pl/math/acosh_3u.c b/math/aarch64/experimental/acosh_3u.c
similarity index 69%
rename from pl/math/acosh_3u.c
rename to math/aarch64/experimental/acosh_3u.c
index 4e2cb6737ba863..19da82f4f3e5d9 100644
--- a/pl/math/acosh_3u.c
+++ b/math/aarch64/experimental/acosh_3u.c
@@ -1,31 +1,26 @@
 /*
  * Double-precision acosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Ln2 (0x1.62e42fefa39efp-1)
 #define MinusZero (0x8000000000000000)
 #define SquareLim (0x5fe0000000000000) /* asuint64(0x1.0p511).  */
 #define Two (0x4000000000000000)       /* asuint64(2.0).  */
 
-double
-optr_aor_log_f64 (double);
-
-double
-log1p (double);
-
 /* acosh approximation using a variety of approaches on different intervals:
 
    acosh(x) = ln(x + sqrt(x * x - 1)).
 
-   x >= 2^511: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
-   close enough to x that we can calculate the result by ln(2x) == ln(x) +
+   x >= 2^511: We cannot square x without overflow. For huge x, sqrt(x*x - 1)
+   is close enough to x that we can calculate the result by ln(2x) == ln(x) +
    ln(2). The greatest observed error in this region is 0.98 ULP:
    acosh(0x1.1b9bf42923d1dp+853) got 0x1.28066a11a7c7fp+9
 				want 0x1.28066a11a7c8p+9.
@@ -48,19 +43,19 @@ acosh (double x)
     return __math_invalid (x);
 
   if (unlikely (ix >= SquareLim))
-    return optr_aor_log_f64 (x) + Ln2;
+    return log (x) + Ln2;
 
   if (ix >= Two)
-    return optr_aor_log_f64 (x + sqrt (x * x - 1));
+    return log (x + sqrt (x * x - 1));
 
   double xm1 = x - 1;
   return log1p (xm1 + sqrt (2 * xm1 + xm1 * xm1));
 }
 
-PL_SIG (S, D, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (acosh, 2.19)
-PL_TEST_INTERVAL (acosh, 0, 1, 10000)
-PL_TEST_INTERVAL (acosh, 1, 2, 100000)
-PL_TEST_INTERVAL (acosh, 2, 0x1p511, 100000)
-PL_TEST_INTERVAL (acosh, 0x1p511, inf, 100000)
-PL_TEST_INTERVAL (acosh, -0, -inf, 10000)
+TEST_SIG (S, D, 1, acosh, 1.0, 10.0)
+TEST_ULP (acosh, 2.19)
+TEST_INTERVAL (acosh, 0, 1, 10000)
+TEST_INTERVAL (acosh, 1, 2, 100000)
+TEST_INTERVAL (acosh, 2, 0x1p511, 100000)
+TEST_INTERVAL (acosh, 0x1p511, inf, 100000)
+TEST_INTERVAL (acosh, -0, -inf, 10000)
diff --git a/pl/math/acoshf_2u8.c b/math/aarch64/experimental/acoshf_2u8.c
similarity index 68%
rename from pl/math/acoshf_2u8.c
rename to math/aarch64/experimental/acoshf_2u8.c
index c9cded7fd2ff7f..a46b310ee312ed 100644
--- a/pl/math/acoshf_2u8.c
+++ b/math/aarch64/experimental/acoshf_2u8.c
@@ -1,27 +1,19 @@
 /*
  * Single-precision acosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Ln2 (0x1.62e4p-1f)
 #define MinusZero 0x80000000
 #define SquareLim 0x5f800000 /* asuint(0x1p64).  */
 #define Two 0x40000000
 
-/* Single-precision log from math/.  */
-float
-optr_aor_log_f32 (float);
-
-/* Single-precision log(1+x) from pl/math.  */
-float
-log1pf (float);
-
 /* acoshf approximation using a variety of approaches on different intervals:
 
    x >= 2^64: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
@@ -45,19 +37,19 @@ acoshf (float x)
     return __math_invalidf (x);
 
   if (unlikely (ix >= SquareLim))
-    return optr_aor_log_f32 (x) + Ln2;
+    return logf (x) + Ln2;
 
   if (ix > Two)
-    return optr_aor_log_f32 (x + sqrtf (x * x - 1));
+    return logf (x + sqrtf (x * x - 1));
 
   float xm1 = x - 1;
   return log1pf (xm1 + sqrtf (2 * xm1 + xm1 * xm1));
 }
 
-PL_SIG (S, F, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (acoshf, 2.30)
-PL_TEST_INTERVAL (acoshf, 0, 1, 100)
-PL_TEST_INTERVAL (acoshf, 1, 2, 10000)
-PL_TEST_INTERVAL (acoshf, 2, 0x1p64, 100000)
-PL_TEST_INTERVAL (acoshf, 0x1p64, inf, 100000)
-PL_TEST_INTERVAL (acoshf, -0, -inf, 10000)
+TEST_SIG (S, F, 1, acosh, 1.0, 10.0)
+TEST_ULP (acoshf, 2.30)
+TEST_INTERVAL (acoshf, 0, 1, 100)
+TEST_INTERVAL (acoshf, 1, 2, 10000)
+TEST_INTERVAL (acoshf, 2, 0x1p64, 100000)
+TEST_INTERVAL (acoshf, 0x1p64, inf, 100000)
+TEST_INTERVAL (acoshf, -0, -inf, 10000)
diff --git a/pl/math/v_erfinv_25u.c b/math/aarch64/experimental/advsimd/erfinv_25u.c
similarity index 88%
rename from pl/math/v_erfinv_25u.c
rename to math/aarch64/experimental/advsimd/erfinv_25u.c
index 654a7336e85bc8..2fa2f0beb8b79b 100644
--- a/pl/math/v_erfinv_25u.c
+++ b/math/aarch64/experimental/advsimd/erfinv_25u.c
@@ -1,15 +1,15 @@
 /*
  * Double-precision inverse error function (AdvSIMD variant).
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "poly_advsimd_f64.h"
+#include "test_sig.h"
+#include "v_poly_f64.h"
 #define V_LOG_INLINE_POLY_ORDER 4
 #include "v_log_inline.h"
 
@@ -22,7 +22,7 @@ const static struct data
       can be taken.  */
   double P[8][2], Q[7][2];
   float64x2_t tailshift;
-  uint8x16_t idx;
+  uint8_t idx[16];
   struct v_log_inline_data log_tbl;
   float64x2_t P_57[9], Q_57[10], P_17[7], Q_17[6];
 } data = { .P = { { 0x1.007ce8f01b2e8p+4, -0x1.f3596123109edp-7 },
@@ -58,7 +58,7 @@ const static struct data
 		     V2 (0x1.a450d8e7f4cbbp+7), V2 (-0x1.bc3480485857p+7),
 		     V2 (0x1.ae6b0c504ee02p+6), V2 (-0x1.499dfec1a7f5fp+4) },
 	   .tailshift = V2 (-0.87890625),
-	   .idx = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+	   .idx = { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 },
 	   .log_tbl = V_LOG_CONSTANTS };
 
 static inline float64x2_t
@@ -128,7 +128,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erfinv) (float64x2_t x)
   uint64x2_t extreme_tail = vcagtq_f64 (x, v_f64 (0.9375));
 
   uint8x16_t off = vandq_u8 (vreinterpretq_u8_u64 (is_tail), vdupq_n_u8 (8));
-  uint8x16_t idx = vaddq_u8 (d->idx, off);
+  uint8x16_t idx = vaddq_u8 (vld1q_u8 (d->idx), off);
 
   float64x2_t t = vbslq_f64 (is_tail, d->tailshift, v_f64 (-0.5625));
   t = vfmaq_f64 (t, x, x);
@@ -150,12 +150,17 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erfinv) (float64x2_t x)
   return vdivq_f64 (p, q);
 }
 
-PL_SIG (V, D, 1, erfinv, -0.99, 0.99)
-PL_TEST_ULP (V_NAME_D1 (erfinv), 24.8)
+#if USE_MPFR
+# warning Not generating tests for _ZGVnN2v_erfinv, as MPFR has no suitable reference
+#else
+TEST_SIG (V, D, 1, erfinv, -0.99, 0.99)
+TEST_ULP (V_NAME_D1 (erfinv), 24.8)
+TEST_DISABLE_FENV (V_NAME_D1 (erfinv))
+TEST_SYM_INTERVAL (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000)
 /* Test with control lane in each interval.  */
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
-			0.5)
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
-			0.8)
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
-			0.95)
+TEST_CONTROL_VALUE (V_NAME_D1 (erfinv), 0.5)
+TEST_CONTROL_VALUE (V_NAME_D1 (erfinv), 0.8)
+TEST_CONTROL_VALUE (V_NAME_D1 (erfinv), 0.95)
+#endif
diff --git a/pl/math/v_erfinvf_5u.c b/math/aarch64/experimental/advsimd/erfinvf_5u.c
similarity index 83%
rename from pl/math/v_erfinvf_5u.c
rename to math/aarch64/experimental/advsimd/erfinvf_5u.c
index 5a6800b86ae9fd..254d50feb2895f 100644
--- a/pl/math/v_erfinvf_5u.c
+++ b/math/aarch64/experimental/advsimd/erfinvf_5u.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision inverse error function (AdvSIMD variant).
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f32.h"
 #include "v_logf_inline.h"
 
 const static struct data
@@ -24,14 +24,15 @@ const static struct data
 
       P_10 and Q_10 are also stored in homogenous vectors to allow better
       memory access when no lanes are in a tail region.  */
-  float32x4_t Plo, PQ, Qhi, P29_3, tailshift;
+  float Plo[4], PQ[4], Qhi[4];
+  float32x4_t P29_3, tailshift;
   float32x4_t P_50[6], Q_50[2];
   float32x4_t P_10[3], Q_10[3];
-  uint8x16_t idxhi, idxlo;
+  uint8_t idxhi[16], idxlo[16];
   struct v_logf_data logf_tbl;
 } data = {
-  .idxlo = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
-  .idxhi = { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
+  .idxlo = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 },
+  .idxhi = { 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11 },
   .P29_3 = V4 (0x1.b13626p-2),
   .tailshift = V4 (-0.87890625),
   .Plo = { -0x1.a31268p+3, -0x1.fc0252p-4, 0x1.ac9048p+4, 0x1.119d44p+0 },
@@ -86,7 +87,7 @@ lookup (float32x4_t tbl, uint8x16_t idx)
    tail region:
    _ZGVnN4v_erfinvf(0x1.f7dbeep-1) got 0x1.b4793p+0
 				  want 0x1.b4793ap+0 .  */
-float32x4_t VPCS_ATTR V_NAME_F1 (erfinv) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (erfinv) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
@@ -124,18 +125,18 @@ float32x4_t VPCS_ATTR V_NAME_F1 (erfinv) (float32x4_t x)
      Add 4 * i to a group of 4 lanes to copy 32-bit lane i. Each vector stores
      two pairs of coeffs, so we need two idx vectors - one for each pair.  */
   uint8x16_t off = vandq_u8 (vreinterpretq_u8_u32 (is_tail), vdupq_n_u8 (4));
-  uint8x16_t idx_lo = vaddq_u8 (d->idxlo, off);
-  uint8x16_t idx_hi = vaddq_u8 (d->idxhi, off);
+  uint8x16_t idx_lo = vaddq_u8 (vld1q_u8 (d->idxlo), off);
+  uint8x16_t idx_hi = vaddq_u8 (vld1q_u8 (d->idxhi), off);
 
   /* Load the tables.  */
-  float32x4_t p_lo = d->Plo;
-  float32x4_t pq = d->PQ;
-  float32x4_t qhi = d->Qhi;
+  float32x4_t plo = vld1q_f32 (d->Plo);
+  float32x4_t pq = vld1q_f32 (d->PQ);
+  float32x4_t qhi = vld1q_f32 (d->Qhi);
 
   /* Do the lookup (and calculate p3 by masking non-tail lanes).  */
   float32x4_t p3 = vreinterpretq_f32_u32 (
       vandq_u32 (is_tail, vreinterpretq_u32_f32 (d->P29_3)));
-  float32x4_t p0 = lookup (p_lo, idx_lo), p1 = lookup (p_lo, idx_hi),
+  float32x4_t p0 = lookup (plo, idx_lo), p1 = lookup (plo, idx_hi),
 	      p2 = lookup (pq, idx_lo), q0 = lookup (pq, idx_hi),
 	      q1 = lookup (qhi, idx_lo), q2 = lookup (qhi, idx_hi);
 
@@ -155,9 +156,17 @@ float32x4_t VPCS_ATTR V_NAME_F1 (erfinv) (float32x4_t x)
   return vdivq_f32 (p, q);
 }
 
-PL_SIG (V, F, 1, erfinv, -0.99, 0.99)
-PL_TEST_ULP (V_NAME_F1 (erfinv), 4.49)
+HALF_WIDTH_ALIAS_F1 (erfinv)
+
+#if USE_MPFR
+# warning Not generating tests for _ZGVnN4v_erfinvf, as MPFR has no suitable reference
+#else
+TEST_SIG (V, F, 1, erfinv, -0.99, 0.99)
+TEST_DISABLE_FENV (V_NAME_F1 (erfinv))
+TEST_ULP (V_NAME_F1 (erfinv), 4.49)
+TEST_SYM_INTERVAL (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000)
 /* Test with control lane in each interval.  */
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.5)
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.8)
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.95)
+TEST_CONTROL_VALUE (V_NAME_F1 (erfinv), 0.5)
+TEST_CONTROL_VALUE (V_NAME_F1 (erfinv), 0.8)
+TEST_CONTROL_VALUE (V_NAME_F1 (erfinv), 0.95)
+#endif
diff --git a/pl/math/v_logf_inline.h b/math/aarch64/experimental/advsimd/v_logf_inline.h
similarity index 97%
rename from pl/math/v_logf_inline.h
rename to math/aarch64/experimental/advsimd/v_logf_inline.h
index c00fe0909afc86..3f45341732892a 100644
--- a/pl/math/v_logf_inline.h
+++ b/math/aarch64/experimental/advsimd/v_logf_inline.h
@@ -1,7 +1,7 @@
 /*
  * Single-precision vector log function - inline version
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/asin_3u.c b/math/aarch64/experimental/asin_3u.c
similarity index 78%
rename from pl/math/asin_3u.c
rename to math/aarch64/experimental/asin_3u.c
index 0b50995449cef3..56e63e451ba18b 100644
--- a/pl/math/asin_3u.c
+++ b/math/aarch64/experimental/asin_3u.c
@@ -1,22 +1,22 @@
 /*
  * Double-precision asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f64.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
-#define AbsMask (0x7fffffffffffffff)
-#define Half (0x3fe0000000000000)
-#define One (0x3ff0000000000000)
-#define PiOver2 (0x1.921fb54442d18p+0)
-#define Small (0x3e50000000000000) /* 2^-26.  */
-#define Small16 (0x3e50)
-#define QNaN (0x7ff8)
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define One 0x3ff0000000000000
+#define PiOver2 0x1.921fb54442d18p+0
+#define Small 0x3e50000000000000 /* 2^-26.  */
+#define Small16 0x3e50
+#define QNaN 0x7ff8
 
 /* Fast implementation of double-precision asin(x) based on polynomial
    approximation.
@@ -54,8 +54,8 @@
      asin(x) ~ pi/2 - acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z)).
 
    The largest observed error in this region is 2.69 ulps,
-   asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-			     want 0x1.110d7e85fdd53p-1.  */
+   asin(0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1
+			     want 0x1.1111dd54ddf99p-1.  */
 double
 asin (double x)
 {
@@ -96,11 +96,11 @@ asin (double x)
   return asdouble (asuint64 (y) | sign);
 }
 
-PL_SIG (S, D, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (asin, 2.19)
-PL_TEST_INTERVAL (asin, 0, Small, 5000)
-PL_TEST_INTERVAL (asin, Small, 0.5, 50000)
-PL_TEST_INTERVAL (asin, 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (asin, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (asin, 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (asin, -0, -inf, 20000)
+TEST_SIG (S, D, 1, asin, -1.0, 1.0)
+TEST_ULP (asin, 2.20)
+TEST_INTERVAL (asin, 0, Small, 5000)
+TEST_INTERVAL (asin, Small, 0.5, 50000)
+TEST_INTERVAL (asin, 0.5, 1.0, 50000)
+TEST_INTERVAL (asin, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (asin, 0x1p11, inf, 20000)
+TEST_INTERVAL (asin, -0, -inf, 20000)
diff --git a/pl/math/asin_data.c b/math/aarch64/experimental/asin_data.c
similarity index 94%
rename from pl/math/asin_data.c
rename to math/aarch64/experimental/asin_data.c
index b5517731c7f4b3..60ab476e7ec950 100644
--- a/pl/math/asin_data.c
+++ b/math/aarch64/experimental/asin_data.c
@@ -1,7 +1,7 @@
 /*
  * Coefficients for single-precision asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/asinf_2u5.c b/math/aarch64/experimental/asinf_2u5.c
similarity index 80%
rename from pl/math/asinf_2u5.c
rename to math/aarch64/experimental/asinf_2u5.c
index ec608146ff666d..1136da01550ecb 100644
--- a/pl/math/asinf_2u5.c
+++ b/math/aarch64/experimental/asinf_2u5.c
@@ -1,22 +1,22 @@
 /*
  * Single-precision asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
-#define AbsMask (0x7fffffff)
-#define Half (0x3f000000)
-#define One (0x3f800000)
-#define PiOver2f (0x1.921fb6p+0f)
-#define Small (0x39800000) /* 2^-12.  */
-#define Small12 (0x398)
-#define QNaN (0x7fc)
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define PiOver2f 0x1.921fb6p+0f
+#define Small 0x39800000 /* 2^-12.  */
+#define Small12 0x398
+#define QNaN 0x7fc
 
 /* Fast implementation of single-precision asin(x) based on polynomial
    approximation.
@@ -90,11 +90,11 @@ asinf (float x)
   return asfloat (asuint (y) | sign);
 }
 
-PL_SIG (S, F, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (asinf, 1.91)
-PL_TEST_INTERVAL (asinf, 0, Small, 5000)
-PL_TEST_INTERVAL (asinf, Small, 0.5, 50000)
-PL_TEST_INTERVAL (asinf, 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (asinf, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (asinf, 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (asinf, -0, -inf, 20000)
+TEST_SIG (S, F, 1, asin, -1.0, 1.0)
+TEST_ULP (asinf, 1.91)
+TEST_INTERVAL (asinf, 0, Small, 5000)
+TEST_INTERVAL (asinf, Small, 0.5, 50000)
+TEST_INTERVAL (asinf, 0.5, 1.0, 50000)
+TEST_INTERVAL (asinf, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (asinf, 0x1p11, inf, 20000)
+TEST_INTERVAL (asinf, -0, -inf, 20000)
diff --git a/pl/math/asinf_data.c b/math/aarch64/experimental/asinf_data.c
similarity index 92%
rename from pl/math/asinf_data.c
rename to math/aarch64/experimental/asinf_data.c
index 1652025e2920cd..15f331dde5a73c 100644
--- a/pl/math/asinf_data.c
+++ b/math/aarch64/experimental/asinf_data.c
@@ -1,7 +1,7 @@
 /*
  * Coefficients for single-precision asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/asinh_2u5.c b/math/aarch64/experimental/asinh_2u5.c
similarity index 75%
rename from pl/math/asinh_2u5.c
rename to math/aarch64/experimental/asinh_2u5.c
index b7fc81a2b94f24..9d2d160a1453af 100644
--- a/pl/math/asinh_2u5.c
+++ b/math/aarch64/experimental/asinh_2u5.c
@@ -1,13 +1,14 @@
 /*
  * Double-precision asinh(x) function
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
+#include "mathlib.h"
 #include "poly_scalar_f64.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define ExpM26 0x3e50000000000000 /* asuint64(0x1.0p-26).  */
@@ -15,9 +16,6 @@
 #define Exp511 0x5fe0000000000000 /* asuint64(0x1.0p511).  */
 #define Ln2 0x1.62e42fefa39efp-1
 
-double
-optr_aor_log_f64 (double);
-
 /* Scalar double-precision asinh implementation. This routine uses different
    approaches on different intervals:
 
@@ -67,19 +65,18 @@ asinh (double x)
 
   if (unlikely (ia >= Exp511))
     {
-      return asdouble (asuint64 (optr_aor_log_f64 (ax) + Ln2) | sign);
+      return asdouble (asuint64 (log (ax) + Ln2) | sign);
     }
 
-  return asdouble (asuint64 (optr_aor_log_f64 (ax + sqrt (ax * ax + 1)))
-		   | sign);
+  return asdouble (asuint64 (log (ax + sqrt (ax * ax + 1))) | sign);
 }
 
-PL_SIG (S, D, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (asinh, 1.54)
-PL_TEST_INTERVAL (asinh, -0x1p-26, 0x1p-26, 50000)
-PL_TEST_INTERVAL (asinh, 0x1p-26, 1.0, 40000)
-PL_TEST_INTERVAL (asinh, -0x1p-26, -1.0, 10000)
-PL_TEST_INTERVAL (asinh, 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (asinh, -1.0, -100.0, 10000)
-PL_TEST_INTERVAL (asinh, 100.0, inf, 50000)
-PL_TEST_INTERVAL (asinh, -100.0, -inf, 10000)
+TEST_SIG (S, D, 1, asinh, -10.0, 10.0)
+TEST_ULP (asinh, 1.54)
+TEST_INTERVAL (asinh, -0x1p-26, 0x1p-26, 50000)
+TEST_INTERVAL (asinh, 0x1p-26, 1.0, 40000)
+TEST_INTERVAL (asinh, -0x1p-26, -1.0, 10000)
+TEST_INTERVAL (asinh, 1.0, 100.0, 40000)
+TEST_INTERVAL (asinh, -1.0, -100.0, 10000)
+TEST_INTERVAL (asinh, 100.0, inf, 50000)
+TEST_INTERVAL (asinh, -100.0, -inf, 10000)
diff --git a/pl/math/asinh_data.c b/math/aarch64/experimental/asinh_data.c
similarity index 51%
rename from pl/math/asinh_data.c
rename to math/aarch64/experimental/asinh_data.c
index 073b19799bda7f..7afaf69601309f 100644
--- a/pl/math/asinh_data.c
+++ b/math/aarch64/experimental/asinh_data.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision polynomial coefficients for scalar asinh(x)
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -13,10 +13,11 @@
    Note P is evaluated on even powers of x only. See tools/asinh.sollya for the
    algorithm used to generate these coefficients.  */
 const struct asinh_data __asinh_data
-  = {.poly
-     = {-0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
-	0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
-	-0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
-	0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
-	-0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
-	0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18}};
+    = { .poly
+	= { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
+	    0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
+	    -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
+	    0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
+	    -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
+	    0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14,
+	    0x1.93d4ba83d34dap-18 } };
diff --git a/pl/math/asinhf_3u5.c b/math/aarch64/experimental/asinhf_3u5.c
similarity index 77%
rename from pl/math/asinhf_3u5.c
rename to math/aarch64/experimental/asinhf_3u5.c
index ec26b80ec2ec4c..92c6dfd9b43d64 100644
--- a/pl/math/asinhf_3u5.c
+++ b/math/aarch64/experimental/asinhf_3u5.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision asinh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask (0x7fffffff)
 #define SqrtFltMax (0x1.749e96p+10f)
@@ -16,9 +16,6 @@
 #define One (0x3f8)
 #define ExpM12 (0x398)
 
-float
-optr_aor_log_f32 (float);
-
 /* asinhf approximation using a variety of approaches on different intervals:
 
    |x| < 2^-12: Return x. Function is exactly rounded in this region.
@@ -62,15 +59,15 @@ asinhf (float x)
 
   if (unlikely (ax > SqrtFltMax))
     {
-      return asfloat (asuint (optr_aor_log_f32 (ax) + Ln2) | sign);
+      return asfloat (asuint (logf (ax) + Ln2) | sign);
     }
 
-  return asfloat (asuint (optr_aor_log_f32 (ax + sqrtf (ax * ax + 1))) | sign);
+  return asfloat (asuint (logf (ax + sqrtf (ax * ax + 1))) | sign);
 }
 
-PL_SIG (S, F, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (asinhf, 2.9)
-PL_TEST_INTERVAL (asinhf, 0, 0x1p-12, 5000)
-PL_TEST_INTERVAL (asinhf, 0x1p-12, 1.0, 50000)
-PL_TEST_INTERVAL (asinhf, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (asinhf, 0x1p11, 0x1p127, 20000)
+TEST_SIG (S, F, 1, asinh, -10.0, 10.0)
+TEST_ULP (asinhf, 2.9)
+TEST_INTERVAL (asinhf, 0, 0x1p-12, 5000)
+TEST_INTERVAL (asinhf, 0x1p-12, 1.0, 50000)
+TEST_INTERVAL (asinhf, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (asinhf, 0x1p11, 0x1p127, 20000)
diff --git a/math/aarch64/experimental/asinhf_data.c b/math/aarch64/experimental/asinhf_data.c
new file mode 100644
index 00000000000000..5ed261ba835b5e
--- /dev/null
+++ b/math/aarch64/experimental/asinhf_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients for single-precision asinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya
+   for these coeffs were generated.  */
+const struct asinhf_data __asinhf_data
+    = { .coeffs = { -0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f,
+		    0x1.3a81dcp-4f, 0x1.65bbaap-10f, -0x1.057f1p-4f,
+		    0x1.6c1d46p-5f, -0x1.4cafe8p-7f } };
diff --git a/pl/math/atan2_2u5.c b/math/aarch64/experimental/atan2_2u5.c
similarity index 91%
rename from pl/math/atan2_2u5.c
rename to math/aarch64/experimental/atan2_2u5.c
index c909ac99fa22e9..518e34589e5bb4 100644
--- a/pl/math/atan2_2u5.c
+++ b/math/aarch64/experimental/atan2_2u5.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision scalar atan2(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,8 +9,8 @@
 
 #include "atan_common.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Pi (0x1.921fb54442d18p+1)
 #define PiOver2 (0x1.921fb54442d18p+0)
@@ -79,8 +79,8 @@ atan2 (double y, double x)
   if (unlikely (iax == 0 || exp_diff <= -POW8_EXP_UFLOW_BOUND))
     return sign_y ? -PiOver2 : PiOver2;
 
-  /* Special case for either x is INF or (x, y) is very close to x axis and x is
-     negative.  */
+  /* Special case for either x is INF or (x, y) is very close to x axis and x
+     is negative.  */
   if (unlikely (iax == 0x7ff0000000000000
 		|| (exp_diff >= POW8_EXP_UFLOW_BOUND && m >= 2)))
     {
@@ -150,10 +150,10 @@ atan2 (double y, double x)
 }
 
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (S, D, 2, atan2)
-PL_TEST_ULP (atan2, 1.78)
-PL_TEST_INTERVAL (atan2, -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (atan2, -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (atan2, 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (atan2, 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (atan2, 1e6, 1e32, 40000)
+TEST_SIG (S, D, 2, atan2)
+TEST_ULP (atan2, 1.78)
+TEST_INTERVAL (atan2, -10.0, 10.0, 50000)
+TEST_INTERVAL (atan2, -1.0, 1.0, 40000)
+TEST_INTERVAL (atan2, 0.0, 1.0, 40000)
+TEST_INTERVAL (atan2, 1.0, 100.0, 40000)
+TEST_INTERVAL (atan2, 1e6, 1e32, 40000)
diff --git a/pl/math/atan2f_3u.c b/math/aarch64/experimental/atan2f_3u.c
similarity index 90%
rename from pl/math/atan2f_3u.c
rename to math/aarch64/experimental/atan2f_3u.c
index 38e1df59c102e2..245ba551566c54 100644
--- a/pl/math/atan2f_3u.c
+++ b/math/aarch64/experimental/atan2f_3u.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision scalar atan2(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,8 +9,8 @@
 
 #include "atanf_common.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Pi (0x1.921fb6p+1f)
 #define PiOver2 (0x1.921fb6p+0f)
@@ -19,8 +19,8 @@
 
 /* We calculate atan2f by P(n/d), where n and d are similar to the input
    arguments, and P is a polynomial. The polynomial may underflow.
-   POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
-   for which P underflows, and is used to special-case such inputs.  */
+   POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and
+   d for which P underflows, and is used to special-case such inputs.  */
 #define POLY_UFLOW_BOUND 24
 
 static inline int32_t
@@ -158,10 +158,10 @@ atan2f (float y, float x)
 }
 
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (S, F, 2, atan2)
-PL_TEST_ULP (atan2f, 2.4)
-PL_TEST_INTERVAL (atan2f, -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (atan2f, -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (atan2f, 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (atan2f, 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (atan2f, 1e6, 1e32, 40000)
+TEST_SIG (S, F, 2, atan2)
+TEST_ULP (atan2f, 2.4)
+TEST_INTERVAL (atan2f, -10.0, 10.0, 50000)
+TEST_INTERVAL (atan2f, -1.0, 1.0, 40000)
+TEST_INTERVAL (atan2f, 0.0, 1.0, 40000)
+TEST_INTERVAL (atan2f, 1.0, 100.0, 40000)
+TEST_INTERVAL (atan2f, 1e6, 1e32, 40000)
diff --git a/pl/math/atan_2u5.c b/math/aarch64/experimental/atan_2u5.c
similarity index 79%
rename from pl/math/atan_2u5.c
rename to math/aarch64/experimental/atan_2u5.c
index ee477010175899..9c9c77d98cd3cd 100644
--- a/pl/math/atan_2u5.c
+++ b/math/aarch64/experimental/atan_2u5.c
@@ -1,12 +1,12 @@
 /*
  * Double-precision atan(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "atan_common.h"
 
 #define AbsMask 0x7fffffffffffffff
@@ -63,11 +63,11 @@ atan (double x)
   return asdouble (asuint64 (y) ^ sign);
 }
 
-PL_SIG (S, D, 1, atan, -10.0, 10.0)
-PL_TEST_ULP (atan, 1.78)
-PL_TEST_INTERVAL (atan, 0, 0x1p-30, 10000)
-PL_TEST_INTERVAL (atan, -0, -0x1p-30, 1000)
-PL_TEST_INTERVAL (atan, 0x1p-30, 0x1p53, 900000)
-PL_TEST_INTERVAL (atan, -0x1p-30, -0x1p53, 90000)
-PL_TEST_INTERVAL (atan, 0x1p53, inf, 10000)
-PL_TEST_INTERVAL (atan, -0x1p53, -inf, 1000)
+TEST_SIG (S, D, 1, atan, -10.0, 10.0)
+TEST_ULP (atan, 1.78)
+TEST_INTERVAL (atan, 0, 0x1p-30, 10000)
+TEST_INTERVAL (atan, -0, -0x1p-30, 1000)
+TEST_INTERVAL (atan, 0x1p-30, 0x1p53, 900000)
+TEST_INTERVAL (atan, -0x1p-30, -0x1p53, 90000)
+TEST_INTERVAL (atan, 0x1p53, inf, 10000)
+TEST_INTERVAL (atan, -0x1p53, -inf, 1000)
diff --git a/pl/math/atan_common.h b/math/aarch64/experimental/atan_common.h
similarity index 95%
rename from pl/math/atan_common.h
rename to math/aarch64/experimental/atan_common.h
index 798cc22cc40aa6..1fd83860219b9e 100644
--- a/pl/math/atan_common.h
+++ b/math/aarch64/experimental/atan_common.h
@@ -2,7 +2,7 @@
  * Double-precision polynomial evaluation function for scalar
  * atan(x) and atan2(y,x).
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/math/aarch64/experimental/atan_data.c b/math/aarch64/experimental/atan_data.c
new file mode 100644
index 00000000000000..5d24fa912d02c3
--- /dev/null
+++ b/math/aarch64/experimental/atan_data.c
@@ -0,0 +1,23 @@
+/*
+ * Double-precision polynomial coefficients for vector atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct atan_poly_data __atan_poly_data
+    = { .poly = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2)
+		     on [2**-1022, 1.0]. See atan.sollya for details of how
+		     these were generated.  */
+		  -0x1.5555555555555p-2,  0x1.99999999996c1p-3,
+		  -0x1.2492492478f88p-3,  0x1.c71c71bc3951cp-4,
+		  -0x1.745d160a7e368p-4,  0x1.3b139b6a88ba1p-4,
+		  -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5,
+		  -0x1.aebfe7b418581p-5,  0x1.842dbe9b0d916p-5,
+		  -0x1.5d30140ae5e99p-5,  0x1.338e31eb2fbbcp-5,
+		  -0x1.00e6eece7de8p-5,	  0x1.860897b29e5efp-6,
+		  -0x1.0051381722a59p-6,  0x1.14e9dc19a4a4ep-7,
+		  -0x1.d0062b42fe3bfp-9,  0x1.17739e210171ap-10,
+		  -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16 } };
diff --git a/pl/math/atanf_2u9.c b/math/aarch64/experimental/atanf_2u9.c
similarity index 82%
rename from pl/math/atanf_2u9.c
rename to math/aarch64/experimental/atanf_2u9.c
index ba6f68089de13f..518415ded6341f 100644
--- a/pl/math/atanf_2u9.c
+++ b/math/aarch64/experimental/atanf_2u9.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision atan(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "atanf_common.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define PiOver2 0x1.921fb6p+0f
 #define AbsMask 0x7fffffff
@@ -64,9 +64,9 @@ atanf (float x)
   return asfloat (asuint (y) ^ sign);
 }
 
-PL_SIG (S, F, 1, atan, -10.0, 10.0)
-PL_TEST_ULP (atanf, 2.38)
-PL_TEST_SYM_INTERVAL (atanf, 0, 0x1p-30, 5000)
-PL_TEST_SYM_INTERVAL (atanf, 0x1p-30, 1, 40000)
-PL_TEST_SYM_INTERVAL (atanf, 1, 0x1p30, 40000)
-PL_TEST_SYM_INTERVAL (atanf, 0x1p30, inf, 1000)
+TEST_SIG (S, F, 1, atan, -10.0, 10.0)
+TEST_ULP (atanf, 2.38)
+TEST_SYM_INTERVAL (atanf, 0, 0x1p-30, 5000)
+TEST_SYM_INTERVAL (atanf, 0x1p-30, 1, 40000)
+TEST_SYM_INTERVAL (atanf, 1, 0x1p30, 40000)
+TEST_SYM_INTERVAL (atanf, 0x1p30, inf, 1000)
diff --git a/pl/math/atanf_common.h b/math/aarch64/experimental/atanf_common.h
similarity index 96%
rename from pl/math/atanf_common.h
rename to math/aarch64/experimental/atanf_common.h
index 8952e7e0078be8..3e654204730963 100644
--- a/pl/math/atanf_common.h
+++ b/math/aarch64/experimental/atanf_common.h
@@ -2,7 +2,7 @@
  * Single-precision polynomial evaluation function for scalar
  * atan(x) and atan2(y,x).
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/math/aarch64/experimental/atanf_data.c b/math/aarch64/experimental/atanf_data.c
new file mode 100644
index 00000000000000..f4d607c2a12d04
--- /dev/null
+++ b/math/aarch64/experimental/atanf_data.c
@@ -0,0 +1,17 @@
+/*
+ * Single-precision polynomial coefficients for vector atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
+ */
+const struct atanf_poly_data __atanf_poly_data
+    = { .poly
+	= { /* See atanf.sollya for details of how these were generated.  */
+	    -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
+	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f,
+	    0x1.01fd88p-8f } };
diff --git a/pl/math/atanh_3u.c b/math/aarch64/experimental/atanh_3u.c
similarity index 88%
rename from pl/math/atanh_3u.c
rename to math/aarch64/experimental/atanh_3u.c
index dcfbe8192a22a9..d01b8bacd46a6c 100644
--- a/pl/math/atanh_3u.c
+++ b/math/aarch64/experimental/atanh_3u.c
@@ -1,21 +1,21 @@
 /*
  * Double-precision atanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include "poly_scalar_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define Half 0x3fe0000000000000
 #define One 0x3ff0000000000000
 #define Ln2Hi 0x1.62e42fefa3800p-1
 #define Ln2Lo 0x1.ef35793c76730p-45
-#define OneMHfRt2Top                                                           \
+#define OneMHfRt2Top                                                          \
   0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)).  */
 #define OneTop12 0x3ff
 #define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)).  */
@@ -76,8 +76,8 @@ atanh (double x)
   return halfsign * log1p_inline ((2 * ax) / (1 - ax));
 }
 
-PL_SIG (S, D, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (atanh, 3.00)
-PL_TEST_SYM_INTERVAL (atanh, 0, 0x1p-23, 10000)
-PL_TEST_SYM_INTERVAL (atanh, 0x1p-23, 1, 90000)
-PL_TEST_SYM_INTERVAL (atanh, 1, inf, 100)
+TEST_SIG (S, D, 1, atanh, -1.0, 1.0)
+TEST_ULP (atanh, 3.00)
+TEST_SYM_INTERVAL (atanh, 0, 0x1p-23, 10000)
+TEST_SYM_INTERVAL (atanh, 0x1p-23, 1, 90000)
+TEST_SYM_INTERVAL (atanh, 1, inf, 100)
diff --git a/pl/math/atanhf_3u1.c b/math/aarch64/experimental/atanhf_3u1.c
similarity index 87%
rename from pl/math/atanhf_3u1.c
rename to math/aarch64/experimental/atanhf_3u1.c
index e99d5a9900a9d6..c452bab91f979e 100644
--- a/pl/math/atanhf_3u1.c
+++ b/math/aarch64/experimental/atanhf_3u1.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision atanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffff
 #define Half 0x3f000000
@@ -79,8 +79,8 @@ atanhf (float x)
   return halfsign * log1pf_inline ((2 * ax) / (1 - ax));
 }
 
-PL_SIG (S, F, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (atanhf, 2.59)
-PL_TEST_SYM_INTERVAL (atanhf, 0, 0x1p-12, 500)
-PL_TEST_SYM_INTERVAL (atanhf, 0x1p-12, 1, 200000)
-PL_TEST_SYM_INTERVAL (atanhf, 1, inf, 1000)
+TEST_SIG (S, F, 1, atanh, -1.0, 1.0)
+TEST_ULP (atanhf, 2.59)
+TEST_SYM_INTERVAL (atanhf, 0, 0x1p-12, 500)
+TEST_SYM_INTERVAL (atanhf, 0x1p-12, 1, 200000)
+TEST_SYM_INTERVAL (atanhf, 1, inf, 1000)
diff --git a/pl/math/cbrt_2u.c b/math/aarch64/experimental/cbrt_2u.c
similarity index 89%
rename from pl/math/cbrt_2u.c
rename to math/aarch64/experimental/cbrt_2u.c
index 80be83c4470c32..cf31627e43dcc0 100644
--- a/pl/math/cbrt_2u.c
+++ b/math/aarch64/experimental/cbrt_2u.c
@@ -1,15 +1,15 @@
 /*
  * Double-precision cbrt(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
-PL_SIG (S, D, 1, cbrt, -10.0, 10.0)
+TEST_SIG (S, D, 1, cbrt, -10.0, 10.0)
 
 #define AbsMask 0x7fffffffffffffff
 #define TwoThirds 0x1.5555555555555p-1
@@ -39,8 +39,8 @@ cbrt (double x)
   int e;
   double m = frexp (asdouble (iax), &e);
 
-  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
-     Newton iterations.  */
+  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
+     for Newton iterations.  */
   double p_01 = fma (C (1), m, C (0));
   double p_23 = fma (C (3), m, C (2));
   double p = fma (p_23, m * m, p_01);
@@ -65,5 +65,5 @@ cbrt (double x)
   return asdouble (asuint64 (ldexp (a * T (2 + e % 3), e / 3)) | sign);
 }
 
-PL_TEST_ULP (cbrt, 1.30)
-PL_TEST_SYM_INTERVAL (cbrt, 0, inf, 1000000)
+TEST_ULP (cbrt, 1.30)
+TEST_SYM_INTERVAL (cbrt, 0, inf, 1000000)
diff --git a/pl/math/cbrt_data.c b/math/aarch64/experimental/cbrt_data.c
similarity index 93%
rename from pl/math/cbrt_data.c
rename to math/aarch64/experimental/cbrt_data.c
index 3d484c2779e24d..dabcb6aff2d4cc 100644
--- a/pl/math/cbrt_data.c
+++ b/math/aarch64/experimental/cbrt_data.c
@@ -1,7 +1,7 @@
 /*
  * Coefficients and table entries for double-precision cbrt(x).
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/cbrtf_1u5.c b/math/aarch64/experimental/cbrtf_1u5.c
similarity index 88%
rename from pl/math/cbrtf_1u5.c
rename to math/aarch64/experimental/cbrtf_1u5.c
index 88fcb7162ef6b1..5f0288e6d27ad8 100644
--- a/pl/math/cbrtf_1u5.c
+++ b/math/aarch64/experimental/cbrtf_1u5.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision cbrt(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffff
 #define SignMask 0x80000000
@@ -18,8 +18,8 @@
 
 /* Approximation for single-precision cbrt(x), using low-order polynomial and
    one Newton iteration on a reduced interval. Greatest error is 1.5 ULP. This
-   is observed for every value where the mantissa is 0x1.81410e and the exponent
-   is a multiple of 3, for example:
+   is observed for every value where the mantissa is 0x1.81410e and the
+   exponent is a multiple of 3, for example:
    cbrtf(0x1.81410ep+30) got 0x1.255d96p+10
 			want 0x1.255d92p+10.  */
 float
@@ -61,6 +61,6 @@ cbrtf (float x)
   return asfloat (asuint (ldexpf (a * T (2 + e % 3), e / 3)) | sign);
 }
 
-PL_SIG (S, F, 1, cbrt, -10.0, 10.0)
-PL_TEST_ULP (cbrtf, 1.03)
-PL_TEST_SYM_INTERVAL (cbrtf, 0, inf, 1000000)
+TEST_SIG (S, F, 1, cbrt, -10.0, 10.0)
+TEST_ULP (cbrtf, 1.03)
+TEST_SYM_INTERVAL (cbrtf, 0, inf, 1000000)
diff --git a/pl/math/cbrtf_data.c b/math/aarch64/experimental/cbrtf_data.c
similarity index 93%
rename from pl/math/cbrtf_data.c
rename to math/aarch64/experimental/cbrtf_data.c
index c6cdb4de0d65bf..7b5c53f4a6066d 100644
--- a/pl/math/cbrtf_data.c
+++ b/math/aarch64/experimental/cbrtf_data.c
@@ -1,7 +1,7 @@
 /*
  * Coefficients and table entries for single-precision cbrt(x).
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/cosh_2u.c b/math/aarch64/experimental/cosh_2u.c
similarity index 70%
rename from pl/math/cosh_2u.c
rename to math/aarch64/experimental/cosh_2u.c
index 2240a9c56f1589..f5bc73b85df855 100644
--- a/pl/math/cosh_2u.c
+++ b/math/aarch64/experimental/cosh_2u.c
@@ -1,21 +1,19 @@
 /*
  * Double-precision cosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "exp_inline.h"
 
 #define AbsMask 0x7fffffffffffffff
-#define SpecialBound                                                           \
+#define SpecialBound                                                          \
   0x40861da04cbafe44 /* 0x1.61da04cbafe44p+9, above which exp overflows.  */
 
-double
-__exp_dd (double, double);
-
 static double
 specialcase (double x, uint64_t iax)
 {
@@ -23,9 +21,9 @@ specialcase (double x, uint64_t iax)
     return INFINITY;
   if (iax > 0x7ff0000000000000)
     return __math_invalid (x);
-  /* exp overflows above SpecialBound. At this magnitude cosh(x) is dominated by
-     exp(x), so we can approximate cosh(x) by (exp(|x|/2)) ^ 2 / 2.  */
-  double t = __exp_dd (asdouble (iax) / 2, 0);
+  /* exp overflows above SpecialBound. At this magnitude cosh(x) is dominated
+     by exp(x), so we can approximate cosh(x) by (exp(|x|/2)) ^ 2 / 2.  */
+  double t = exp_inline (asdouble (iax) / 2, 0);
   return (0.5 * t) * t;
 }
 
@@ -44,20 +42,20 @@ cosh (double x)
   uint64_t ix = asuint64 (x);
   uint64_t iax = ix & AbsMask;
 
-  /* exp overflows a little bit before cosh, so use special-case handler for the
-     gap, as well as special values.  */
+  /* exp overflows a little bit before cosh, so use special-case handler for
+     the gap, as well as special values.  */
   if (unlikely (iax >= SpecialBound))
     return specialcase (x, iax);
 
   double ax = asdouble (iax);
   /* Use double-precision exp helper to calculate exp(x), then:
      cosh(x) = exp(|x|) / 2 + 1 / (exp(|x| * 2).  */
-  double t = __exp_dd (ax, 0);
+  double t = exp_inline (ax, 0);
   return 0.5 * t + 0.5 / t;
 }
 
-PL_SIG (S, D, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (cosh, 1.43)
-PL_TEST_SYM_INTERVAL (cosh, 0, 0x1.61da04cbafe44p+9, 100000)
-PL_TEST_SYM_INTERVAL (cosh, 0x1.61da04cbafe44p+9, 0x1p10, 1000)
-PL_TEST_SYM_INTERVAL (cosh, 0x1p10, inf, 100)
+TEST_SIG (S, D, 1, cosh, -10.0, 10.0)
+TEST_ULP (cosh, 1.43)
+TEST_SYM_INTERVAL (cosh, 0, 0x1.61da04cbafe44p+9, 100000)
+TEST_SYM_INTERVAL (cosh, 0x1.61da04cbafe44p+9, 0x1p10, 1000)
+TEST_SYM_INTERVAL (cosh, 0x1p10, inf, 100)
diff --git a/pl/math/coshf_1u9.c b/math/aarch64/experimental/coshf_1u9.c
similarity index 71%
rename from pl/math/coshf_1u9.c
rename to math/aarch64/experimental/coshf_1u9.c
index cf737840e0d698..b7e7720a472ec8 100644
--- a/pl/math/coshf_1u9.c
+++ b/math/aarch64/experimental/coshf_1u9.c
@@ -1,22 +1,19 @@
 /*
  * Single-precision cosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffff
 #define TinyBound 0x20000000 /* 0x1p-63: Round to 1 below this.  */
-#define SpecialBound                                                           \
-  0x42ad496c /* 0x1.5a92d8p+6: expf overflows above this, so have to use       \
-		special case.  */
-
-float
-optr_aor_exp_f32 (float);
+/* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
+#define SpecialBound 0x42ad496c
 
 static NOINLINE float
 specialcase (float x, uint32_t iax)
@@ -32,7 +29,7 @@ specialcase (float x, uint32_t iax)
      without overflow, so use exp(|x|/2) instead. For large x cosh(x) is
      dominated by exp(x), so return:
      cosh(x) ~= (exp(|x|/2))^2 / 2.  */
-  float t = optr_aor_exp_f32 (asfloat (iax) / 2);
+  float t = expf (asfloat (iax) / 2);
   return (0.5 * t) * t;
 }
 
@@ -57,12 +54,12 @@ coshf (float x)
 
   /* Compute cosh using the definition:
      coshf(x) = exp(x) / 2 + exp(-x) / 2.  */
-  float t = optr_aor_exp_f32 (ax);
+  float t = expf (ax);
   return 0.5f * t + 0.5f / t;
 }
 
-PL_SIG (S, F, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (coshf, 1.89)
-PL_TEST_SYM_INTERVAL (coshf, 0, 0x1p-63, 100)
-PL_TEST_SYM_INTERVAL (coshf, 0, 0x1.5a92d8p+6, 80000)
-PL_TEST_SYM_INTERVAL (coshf, 0x1.5a92d8p+6, inf, 2000)
+TEST_SIG (S, F, 1, cosh, -10.0, 10.0)
+TEST_ULP (coshf, 1.89)
+TEST_SYM_INTERVAL (coshf, 0, 0x1p-63, 100)
+TEST_SYM_INTERVAL (coshf, 0, 0x1.5a92d8p+6, 80000)
+TEST_SYM_INTERVAL (coshf, 0x1.5a92d8p+6, inf, 2000)
diff --git a/pl/math/erf_2u5.c b/math/aarch64/experimental/erf_2u5.c
similarity index 87%
rename from pl/math/erf_2u5.c
rename to math/aarch64/experimental/erf_2u5.c
index 3ca2a1332c1f35..0bbe3e9548f88e 100644
--- a/pl/math/erf_2u5.c
+++ b/math/aarch64/experimental/erf_2u5.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
 #define Shift 0x1p45
@@ -42,7 +42,7 @@
    erf(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
 			     want -0x1.20dd59132ebafp-8.  */
 double
-erf (double x)
+arm_math_erf (double x)
 {
   /* Get absolute value and sign.  */
   uint64_t ix = asuint64 (x);
@@ -62,8 +62,8 @@ erf (double x)
       double r = z - Shift;
       /* Lookup erf(r) and scale(r) in table.
 	 Set erf(r) to 0 and scale to 2/sqrt(pi) for |x| <= 0x1.cp-9.  */
-      double erfr = __erf_data.tab[i].erf;
-      double scale = __erf_data.tab[i].scale;
+      double erfr = __v_erf_data.tab[i].erf;
+      double scale = __v_erf_data.tab[i].scale;
 
       /* erf(x) ~ erf(r) + scale * d * poly (d, r).  */
       double d = a - r;
@@ -95,8 +95,7 @@ erf (double x)
   return asdouble (sign | asuint64 (1.0));
 }
 
-PL_SIG (S, D, 1, erf, -6.0, 6.0)
-PL_TEST_ULP (erf, 1.79)
-PL_TEST_SYM_INTERVAL (erf, 0, 5.9921875, 40000)
-PL_TEST_SYM_INTERVAL (erf, 5.9921875, inf, 40000)
-PL_TEST_SYM_INTERVAL (erf, 0, inf, 40000)
+TEST_ULP (arm_math_erf, 1.79)
+TEST_SYM_INTERVAL (arm_math_erf, 0, 5.9921875, 40000)
+TEST_SYM_INTERVAL (arm_math_erf, 5.9921875, inf, 40000)
+TEST_SYM_INTERVAL (arm_math_erf, 0, inf, 40000)
diff --git a/pl/math/erfc_1u8.c b/math/aarch64/experimental/erfc_1u8.c
similarity index 90%
rename from pl/math/erfc_1u8.c
rename to math/aarch64/experimental/erfc_1u8.c
index 7f2004e9335d7e..5357e932943379 100644
--- a/pl/math/erfc_1u8.c
+++ b/math/aarch64/experimental/erfc_1u8.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Shift 0x1p45
 #define P20 0x1.5555555555555p-2 /* 1/3.  */
@@ -86,11 +86,11 @@ erfc (double x)
       /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale
 	 to 2/sqrt(pi), when x reduced to r = 0.  */
       double z = a + Shift;
-      uint64_t i = asuint64 (z);
+      uint64_t i = asuint64 (z) - asuint64 (Shift);
       double r = z - Shift;
       /* These values are scaled by 2^128.  */
-      double erfcr = __erfc_data.tab[i].erfc;
-      double scale = __erfc_data.tab[i].scale;
+      double erfcr = __v_erfc_data.tab[i].erfc;
+      double scale = __v_erfc_data.tab[i].scale;
 
       /* erfc(x) ~ erfc(r) - scale * d * poly (r, d).  */
       double d = a - r;
@@ -144,10 +144,10 @@ erfc (double x)
   return __math_uflow (0);
 }
 
-PL_SIG (S, D, 1, erfc, -6.0, 28.0)
-PL_TEST_ULP (erfc, 1.21)
-PL_TEST_SYM_INTERVAL (erfc, 0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (erfc, 0x1p-26, 28.0, 100000)
-PL_TEST_INTERVAL (erfc, -0x1p-26, -6.0, 100000)
-PL_TEST_INTERVAL (erfc, 28.0, inf, 40000)
-PL_TEST_INTERVAL (erfc, -6.0, -inf, 40000)
+TEST_SIG (S, D, 1, erfc, -6.0, 28.0)
+TEST_ULP (erfc, 1.21)
+TEST_SYM_INTERVAL (erfc, 0, 0x1p-26, 40000)
+TEST_INTERVAL (erfc, 0x1p-26, 28.0, 100000)
+TEST_INTERVAL (erfc, -0x1p-26, -6.0, 100000)
+TEST_INTERVAL (erfc, 28.0, inf, 40000)
+TEST_INTERVAL (erfc, -6.0, -inf, 40000)
diff --git a/pl/math/erfcf_1u7.c b/math/aarch64/experimental/erfcf_1u7.c
similarity index 86%
rename from pl/math/erfcf_1u7.c
rename to math/aarch64/experimental/erfcf_1u7.c
index c8ce95cca058c1..e56193c8a1030d 100644
--- a/pl/math/erfcf_1u7.c
+++ b/math/aarch64/experimental/erfcf_1u7.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Shift 0x1p17f
 #define OneThird 0x1.555556p-2f
@@ -59,8 +59,8 @@ erfcf (float x)
       float r = z - Shift;
 
       /* These values are scaled by 2^-47.  */
-      float erfcr = __erfcf_data.tab[i].erfc;
-      float scale = __erfcf_data.tab[i].scale;
+      float erfcr = __v_erfcf_data.tab[i].erfc;
+      float scale = __v_erfcf_data.tab[i].scale;
 
       /* erfc(x) ~ erfc(r) - scale * d * poly (r, d).  */
       float d = a - r;
@@ -94,10 +94,10 @@ erfcf (float x)
   return sign ? 2.0f : __math_uflowf (0);
 }
 
-PL_SIG (S, F, 1, erfc, -4.0, 10.0)
-PL_TEST_ULP (erfcf, 1.14)
-PL_TEST_SYM_INTERVAL (erfcf, 0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (erfcf, 0x1p-26, 10.0625, 40000)
-PL_TEST_INTERVAL (erfcf, -0x1p-26, -4.0, 40000)
-PL_TEST_INTERVAL (erfcf, 10.0625, inf, 40000)
-PL_TEST_INTERVAL (erfcf, -4.0, -inf, 40000)
+TEST_SIG (S, F, 1, erfc, -4.0, 10.0)
+TEST_ULP (erfcf, 1.14)
+TEST_SYM_INTERVAL (erfcf, 0, 0x1p-26, 40000)
+TEST_INTERVAL (erfcf, 0x1p-26, 10.0625, 40000)
+TEST_INTERVAL (erfcf, -0x1p-26, -4.0, 40000)
+TEST_INTERVAL (erfcf, 10.0625, inf, 40000)
+TEST_INTERVAL (erfcf, -4.0, -inf, 40000)
diff --git a/pl/math/erff_2u.c b/math/aarch64/experimental/erff_2u.c
similarity index 83%
rename from pl/math/erff_2u.c
rename to math/aarch64/experimental/erff_2u.c
index f43e647072f866..9487f60dd1e3c9 100644
--- a/pl/math/erff_2u.c
+++ b/math/aarch64/experimental/erff_2u.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
 #define Shift 0x1p16f
@@ -37,7 +37,7 @@
    erff(0x1.c373e6p-9) got 0x1.fd686cp-9
 		      want 0x1.fd6868p-9.  */
 float
-erff (float x)
+arm_math_erff (float x)
 {
   /* Get absolute value and sign.  */
   uint32_t ix = asuint (x);
@@ -56,8 +56,8 @@ erff (float x)
       float z = a + Shift;
       uint32_t i = asuint (z) - asuint (Shift);
       float r = z - Shift;
-      float erfr = __erff_data.tab[i].erf;
-      float scale = __erff_data.tab[i].scale;
+      float erfr = __v_erff_data.tab[i].erf;
+      float scale = __v_erff_data.tab[i].scale;
 
       /* erf(x) ~ erf(r) + scale * d * (1 - r * d - 1/3 * d^2).  */
       float d = a - r;
@@ -75,8 +75,7 @@ erff (float x)
   return asfloat (sign | asuint (1.0f));
 }
 
-PL_SIG (S, F, 1, erf, -4.0, 4.0)
-PL_TEST_ULP (erff, 1.43)
-PL_TEST_SYM_INTERVAL (erff, 0, 3.9375, 40000)
-PL_TEST_SYM_INTERVAL (erff, 3.9375, inf, 40000)
-PL_TEST_SYM_INTERVAL (erff, 0, inf, 40000)
+TEST_ULP (arm_math_erff, 1.43)
+TEST_SYM_INTERVAL (arm_math_erff, 0, 3.9375, 40000)
+TEST_SYM_INTERVAL (arm_math_erff, 3.9375, inf, 40000)
+TEST_SYM_INTERVAL (arm_math_erff, 0, inf, 40000)
diff --git a/pl/math/erfinv_24u5.c b/math/aarch64/experimental/erfinv_24u5.c
similarity index 88%
rename from pl/math/erfinv_24u5.c
rename to math/aarch64/experimental/erfinv_24u5.c
index 20e1e361befc2c..753f38a79f664d 100644
--- a/pl/math/erfinv_24u5.c
+++ b/math/aarch64/experimental/erfinv_24u5.c
@@ -1,14 +1,13 @@
 /*
  * Double-precision inverse error function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
 #include "poly_scalar_f64.h"
-#include "pl_sig.h"
-#define IGNORE_SCALAR_FENV
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 const static struct
 {
@@ -75,7 +74,12 @@ erfinv (double x)
 	 / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
 }
 
-PL_SIG (S, D, 1, erfinv, -0.99, 0.99)
-PL_TEST_ULP (erfinv, 24.0)
-PL_TEST_INTERVAL (erfinv, 0, 1, 40000)
-PL_TEST_INTERVAL (erfinv, -0x1p-1022, -1, 40000)
+#if USE_MPFR
+# warning Not generating tests for erfinv, as MPFR has no suitable reference
+#else
+TEST_DISABLE_FENV (erfinv)
+TEST_SIG (S, D, 1, erfinv, -0.99, 0.99)
+TEST_ULP (erfinv, 24.0)
+TEST_INTERVAL (erfinv, 0, 1, 40000)
+TEST_INTERVAL (erfinv, -0x1p-1022, -1, 40000)
+#endif
diff --git a/pl/math/erfinvf_4u7.c b/math/aarch64/experimental/erfinvf_4u7.c
similarity index 88%
rename from pl/math/erfinvf_4u7.c
rename to math/aarch64/experimental/erfinvf_4u7.c
index 40736da08be846..152994f6336a7a 100644
--- a/pl/math/erfinvf_4u7.c
+++ b/math/aarch64/experimental/erfinvf_4u7.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision inverse error function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 const static struct
 {
@@ -69,6 +69,10 @@ erfinvf (float x)
 	 / (copysignf (t, x) * horner_2_f32 (t, data.Q_50));
 }
 
-PL_SIG (S, F, 1, erfinv, -0.99, 0.99)
-PL_TEST_ULP (erfinvf, 4.09)
-PL_TEST_SYM_INTERVAL (erfinvf, 0, 1, 40000)
+#if USE_MPFR
+# warning Not generating tests for erfinvf, as MPFR has no suitable reference
+#else
+TEST_SIG (S, F, 1, erfinv, -0.99, 0.99)
+TEST_ULP (erfinvf, 4.09)
+TEST_SYM_INTERVAL (erfinvf, 0, 1, 40000)
+#endif
diff --git a/pl/math/erfinvl.c b/math/aarch64/experimental/erfinvl.c
similarity index 98%
rename from pl/math/erfinvl.c
rename to math/aarch64/experimental/erfinvl.c
index ea4aadfccd00bc..4d91410f1a5c27 100644
--- a/pl/math/erfinvl.c
+++ b/math/aarch64/experimental/erfinvl.c
@@ -1,7 +1,7 @@
 /*
  * Extended precision inverse error function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #define _GNU_SOURCE
diff --git a/pl/math/exp.c b/math/aarch64/experimental/exp_inline.h
similarity index 93%
rename from pl/math/exp.c
rename to math/aarch64/experimental/exp_inline.h
index 90253b68875dc3..1a327c1e67d362 100644
--- a/pl/math/exp.c
+++ b/math/aarch64/experimental/exp_inline.h
@@ -1,10 +1,13 @@
 /*
  * Double-precision e^x function.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#ifndef PL_MATH_EXP_INLINE_H
+#define PL_MATH_EXP_INLINE_H
+
 #include <float.h>
 #include <math.h>
 #include <stdint.h>
@@ -30,7 +33,7 @@
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
 static inline double
-specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+exp_inline_special_case (double_t tmp, uint64_t sbits, uint64_t ki)
 {
   double_t scale, y;
 
@@ -77,7 +80,7 @@ top12 (double x)
 /* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    If hastail is 0 then xtail is assumed to be 0 too.  */
 static inline double
-exp_inline (double x, double xtail, int hastail)
+exp_inline (double x, double xtail)
 {
   uint32_t abstop;
   uint64_t ki, idx, top, sbits;
@@ -125,7 +128,7 @@ exp_inline (double x, double xtail, int hastail)
 #endif
   r = x + kd * NegLn2hiN + kd * NegLn2loN;
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  if (hastail)
+  if (!__builtin_constant_p (xtail) || xtail != 0.0)
     r += xtail;
   /* 2^(k/N) ~= scale * (1 + tail).  */
   idx = 2 * (ki % N);
@@ -146,18 +149,11 @@ exp_inline (double x, double xtail, int hastail)
   tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
 #endif
   if (unlikely (abstop == 0))
-    return specialcase (tmp, sbits, ki);
+    return exp_inline_special_case (tmp, sbits, ki);
   scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
   return eval_as_double (scale + scale * tmp);
 }
 
-/* May be useful for implementing pow where more than double
-   precision input is needed.  */
-double
-__exp_dd (double x, double xtail)
-{
-  return exp_inline (x, xtail, 1);
-}
-
+#endif
diff --git a/pl/math/expf_data.c b/math/aarch64/experimental/expf_data.c
similarity index 93%
rename from pl/math/expf_data.c
rename to math/aarch64/experimental/expf_data.c
index 474ad57a29a06a..958f705cc67694 100644
--- a/pl/math/expf_data.c
+++ b/math/aarch64/experimental/expf_data.c
@@ -2,7 +2,7 @@
  * Coeffs and table entries for single-precision exp. Copied from
  * math/exp2f_data.c, with EXP2F_TABLE_BITS == 32.
  *
- * Copyright (c) 2017-2023, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -13,7 +13,7 @@
 const struct expf_data __expf_data = {
   /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
      used for computing 2^(k/N) for an int |k| < 150 N as
-     double(tab[k%N] + (k << 52-BITS)) */
+     double(tab[k%N] + (k << 52-BITS)).  */
   .tab = {
 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
diff --git a/pl/math/expm1_2u5.c b/math/aarch64/experimental/expm1_2u5.c
similarity index 83%
rename from pl/math/expm1_2u5.c
rename to math/aarch64/experimental/expm1_2u5.c
index f7d43119861482..a4805e832af3ee 100644
--- a/pl/math/expm1_2u5.c
+++ b/math/aarch64/experimental/expm1_2u5.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision e^x - 1 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f64.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define InvLn2 0x1.71547652b82fep0
 #define Ln2hi 0x1.62e42fefa39efp-1
@@ -76,10 +76,10 @@ expm1 (double x)
   return 2 * fma (p, t, t - 0.5);
 }
 
-PL_SIG (S, D, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (expm1, 1.68)
-PL_TEST_SYM_INTERVAL (expm1, 0, 0x1p-51, 1000)
-PL_TEST_INTERVAL (expm1, 0x1p-51, 0x1.63108c75a1937p+9, 100000)
-PL_TEST_INTERVAL (expm1, -0x1p-51, -0x1.740bf7c0d927dp+9, 100000)
-PL_TEST_INTERVAL (expm1, 0x1.63108c75a1937p+9, inf, 100)
-PL_TEST_INTERVAL (expm1, -0x1.740bf7c0d927dp+9, -inf, 100)
+TEST_SIG (S, D, 1, expm1, -9.9, 9.9)
+TEST_ULP (expm1, 1.68)
+TEST_SYM_INTERVAL (expm1, 0, 0x1p-51, 1000)
+TEST_INTERVAL (expm1, 0x1p-51, 0x1.63108c75a1937p+9, 100000)
+TEST_INTERVAL (expm1, -0x1p-51, -0x1.740bf7c0d927dp+9, 100000)
+TEST_INTERVAL (expm1, 0x1.63108c75a1937p+9, inf, 100)
+TEST_INTERVAL (expm1, -0x1.740bf7c0d927dp+9, -inf, 100)
diff --git a/math/aarch64/experimental/expm1_data.c b/math/aarch64/experimental/expm1_data.c
new file mode 100644
index 00000000000000..95589505692438
--- /dev/null
+++ b/math/aarch64/experimental/expm1_data.c
@@ -0,0 +1,21 @@
+/*
+ * Coefficients for double-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Generated using fpminimax, see tools/expm1.sollya for details.  */
+const double __expm1_poly[] = { 0x1p-1,
+				0x1.5555555555559p-3,
+				0x1.555555555554bp-5,
+				0x1.111111110f663p-7,
+				0x1.6c16c16c1b5f3p-10,
+				0x1.a01a01affa35dp-13,
+				0x1.a01a018b4ecbbp-16,
+				0x1.71ddf82db5bb4p-19,
+				0x1.27e517fc0d54bp-22,
+				0x1.af5eedae67435p-26,
+				0x1.1f143d060a28ap-29 };
diff --git a/pl/math/expm1f_1u6.c b/math/aarch64/experimental/expm1f_1u6.c
similarity index 82%
rename from pl/math/expm1f_1u6.c
rename to math/aarch64/experimental/expm1f_1u6.c
index e12c9ba9a8a2d9..03d1e9dc31ef96 100644
--- a/pl/math/expm1f_1u6.c
+++ b/math/aarch64/experimental/expm1f_1u6.c
@@ -1,23 +1,23 @@
 /*
  * Single-precision e^x - 1 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Shift (0x1.8p23f)
 #define InvLn2 (0x1.715476p+0f)
 #define Ln2hi (0x1.62e4p-1f)
 #define Ln2lo (0x1.7f7d1cp-20f)
 #define AbsMask (0x7fffffff)
-#define InfLimit                                                               \
+#define InfLimit                                                              \
   (0x1.644716p6) /* Smallest value of x for which expm1(x) overflows.  */
-#define NegLimit                                                               \
+#define NegLimit                                                              \
   (-0x1.9bbabcp+6) /* Largest value of x for which expm1(x) rounds to 1.  */
 
 /* Approximation for exp(x) - 1 using polynomial on a reduced interval.
@@ -70,10 +70,10 @@ expm1f (float x)
   return 2 * fmaf (p, t, t - 0.5f);
 }
 
-PL_SIG (S, F, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (expm1f, 1.02)
-PL_TEST_SYM_INTERVAL (expm1f, 0, 0x1p-23, 1000)
-PL_TEST_INTERVAL (expm1f, 0x1p-23, 0x1.644716p6, 100000)
-PL_TEST_INTERVAL (expm1f, 0x1.644716p6, inf, 1000)
-PL_TEST_INTERVAL (expm1f, -0x1p-23, -0x1.9bbabcp+6, 100000)
-PL_TEST_INTERVAL (expm1f, -0x1.9bbabcp+6, -inf, 1000)
+TEST_SIG (S, F, 1, expm1, -9.9, 9.9)
+TEST_ULP (expm1f, 1.02)
+TEST_SYM_INTERVAL (expm1f, 0, 0x1p-23, 1000)
+TEST_INTERVAL (expm1f, 0x1p-23, 0x1.644716p6, 100000)
+TEST_INTERVAL (expm1f, 0x1.644716p6, inf, 1000)
+TEST_INTERVAL (expm1f, -0x1p-23, -0x1.9bbabcp+6, 100000)
+TEST_INTERVAL (expm1f, -0x1.9bbabcp+6, -inf, 1000)
diff --git a/pl/math/expm1f_data.c b/math/aarch64/experimental/expm1f_data.c
similarity index 59%
rename from pl/math/expm1f_data.c
rename to math/aarch64/experimental/expm1f_data.c
index 9d02dc448ebb1f..92d9189ff5033d 100644
--- a/pl/math/expm1f_data.c
+++ b/math/aarch64/experimental/expm1f_data.c
@@ -1,12 +1,12 @@
 /*
  * Coefficients for single-precision e^x - 1 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 /* Generated using fpminimax, see tools/expm1f.sollya for details.  */
-const float __expm1f_poly[] = {0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5,
-			       0x1.12287cp-7, 0x1.6b55a2p-10};
+const float __expm1f_poly[] = { 0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5,
+				0x1.12287cp-7, 0x1.6b55a2p-10 };
diff --git a/pl/math/log10_2u.c b/math/aarch64/experimental/log10_2u.c
similarity index 84%
rename from pl/math/log10_2u.c
rename to math/aarch64/experimental/log10_2u.c
index 74828ea9ef3caa..84ee1544fe1af9 100644
--- a/pl/math/log10_2u.c
+++ b/math/aarch64/experimental/log10_2u.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision log10(x) function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Polynomial coefficients and lookup tables.  */
 #define T __log10_data.tab
@@ -32,11 +32,11 @@ top16 (double x)
 /* Fast and low accuracy implementation of log10.
    The implementation is similar to that of math/log, except that:
    - Polynomials are computed for log10(1+r) with r on same intervals as log.
-   - Lookup parameters are scaled (at runtime) to switch from base e to base 10.
-   Many errors above 1.59 ulp are observed across the whole range of doubles.
-   The greatest observed error is 1.61 ulp, at around 0.965:
-   log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6
-			      want -0x1.fee26884905a8p-6.  */
+   - Lookup parameters are scaled (at runtime) to switch from base e to
+     base 10. Many errors above 1.59 ulp are observed across the whole range of
+     doubles. The greatest observed error is 1.61 ulp, at around 0.965:
+     log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6
+				want -0x1.fee26884905a8p-6.  */
 double
 log10 (double x)
 {
@@ -61,8 +61,8 @@ log10 (double x)
       y = r3
 	  * (B[1] + r * B[2] + r2 * B[3]
 	     + r3
-		 * (B[4] + r * B[5] + r2 * B[6]
-		    + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+		   * (B[4] + r * B[5] + r2 * B[6]
+		      + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
       /* Worst-case error is around 0.507 ULP.  */
       w = r * 0x1p27;
       double_t rhi = r + w - w;
@@ -123,7 +123,8 @@ log10 (double x)
   r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
 
   /* Scale by 1/ln(10). Polynomial already contains scaling.  */
-  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4]))
+      + hi;
   y = y * InvLn10;
 
   return eval_as_double (y);
@@ -143,8 +144,8 @@ log10l (long double x)
 #endif
 // clang-format on
 
-PL_SIG (S, D, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (log10, 1.11)
-PL_TEST_INTERVAL (log10, 0, 0xffff000000000000, 10000)
-PL_TEST_INTERVAL (log10, 0x1p-4, 0x1p4, 40000)
-PL_TEST_INTERVAL (log10, 0, inf, 40000)
+TEST_SIG (S, D, 1, log10, 0.01, 11.1)
+TEST_ULP (log10, 1.11)
+TEST_INTERVAL (log10, 0, 0xffff000000000000, 10000)
+TEST_INTERVAL (log10, 0x1p-4, 0x1p4, 40000)
+TEST_INTERVAL (log10, 0, inf, 40000)
diff --git a/pl/math/log10_data.c b/math/aarch64/experimental/log10_data.c
similarity index 99%
rename from pl/math/log10_data.c
rename to math/aarch64/experimental/log10_data.c
index 9976f19cd6df3f..20b5ef883ed846 100644
--- a/pl/math/log10_data.c
+++ b/math/aarch64/experimental/log10_data.c
@@ -1,7 +1,7 @@
 /*
  * Data for log10.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -333,5 +333,5 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
 {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
 #endif
 },
-#endif /* !HAVE_FAST_FMA */
+#endif /* !HAVE_FAST_FMA.  */
 };
diff --git a/pl/math/log1p_2u.c b/math/aarch64/experimental/log1p_2u.c
similarity index 91%
rename from pl/math/log1p_2u.c
rename to math/aarch64/experimental/log1p_2u.c
index f9491ce52b4449..a1ff309ecb5fcc 100644
--- a/pl/math/log1p_2u.c
+++ b/math/aarch64/experimental/log1p_2u.c
@@ -1,19 +1,19 @@
 /*
  * Double-precision log(1+x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f64.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Ln2Hi 0x1.62e42fefa3800p-1
 #define Ln2Lo 0x1.ef35793c76730p-45
 #define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)).  */
-#define OneMHfRt2Top                                                           \
+#define OneMHfRt2Top                                                          \
   0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)).  */
 #define OneTop12 0x3ff
 #define BottomMask 0xffffffff
@@ -123,9 +123,9 @@ log1p (double x)
   return y + fma (Ln2Hi, kd, p);
 }
 
-PL_SIG (S, D, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (log1p, 1.26)
-PL_TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
-PL_TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
-PL_TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000)
-PL_TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000)
+TEST_SIG (S, D, 1, log1p, -0.9, 10.0)
+TEST_ULP (log1p, 1.26)
+TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
+TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
+TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000)
+TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000)
diff --git a/math/aarch64/experimental/log1p_data.c b/math/aarch64/experimental/log1p_data.c
new file mode 100644
index 00000000000000..91a7196d795f8a
--- /dev/null
+++ b/math/aarch64/experimental/log1p_data.c
@@ -0,0 +1,20 @@
+/*
+ * Data used in double-precision log(1+x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Polynomial coefficients generated using Remez algorithm, see
+   log1p.sollya for details.  */
+const struct log1p_data __log1p_data
+    = { .coeffs
+	= { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
+	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
+	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
+	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
+	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
+	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
+	    -0x1.cfa7385bdb37ep-6 } };
diff --git a/pl/math/log1pf_2u1.c b/math/aarch64/experimental/log1pf_2u1.c
similarity index 93%
rename from pl/math/log1pf_2u1.c
rename to math/aarch64/experimental/log1pf_2u1.c
index e991748537204d..fe4f9386522023 100644
--- a/pl/math/log1pf_2u1.c
+++ b/math/aarch64/experimental/log1pf_2u1.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision log(1+x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Ln2 (0x1.62e43p-1f)
 #define SignMask (0x80000000)
@@ -153,9 +153,9 @@ log1pf (float x)
   return fmaf (scale_back, Ln2, p);
 }
 
-PL_SIG (S, F, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (log1pf, 1.52)
-PL_TEST_SYM_INTERVAL (log1pf, 0.0, 0x1p-23, 50000)
-PL_TEST_SYM_INTERVAL (log1pf, 0x1p-23, 0.001, 50000)
-PL_TEST_SYM_INTERVAL (log1pf, 0.001, 1.0, 50000)
-PL_TEST_SYM_INTERVAL (log1pf, 1.0, inf, 5000)
+TEST_SIG (S, F, 1, log1p, -0.9, 10.0)
+TEST_ULP (log1pf, 1.52)
+TEST_SYM_INTERVAL (log1pf, 0.0, 0x1p-23, 50000)
+TEST_SYM_INTERVAL (log1pf, 0x1p-23, 0.001, 50000)
+TEST_SYM_INTERVAL (log1pf, 0.001, 1.0, 50000)
+TEST_SYM_INTERVAL (log1pf, 1.0, inf, 5000)
diff --git a/pl/math/log1pf_data.c b/math/aarch64/experimental/log1pf_data.c
similarity index 59%
rename from pl/math/log1pf_data.c
rename to math/aarch64/experimental/log1pf_data.c
index 8c92d5738fe82a..e0ac269a10692c 100644
--- a/pl/math/log1pf_data.c
+++ b/math/aarch64/experimental/log1pf_data.c
@@ -1,7 +1,7 @@
 /*
  * Data used in single-precision log1p(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
@@ -9,6 +9,6 @@
 /* Polynomial coefficients generated using floating-point minimax
    algorithm, see tools/log1pf.sollya for details.  */
 const struct log1pf_data __log1pf_data
-  = {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-		-0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
-		-0x1.6f0d5ep-5f}};
+    = { .coeffs = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
+		    -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f,
+		    0x1.abcb6p-4f, -0x1.6f0d5ep-5f } };
diff --git a/pl/math/sinh_3u.c b/math/aarch64/experimental/sinh_3u.c
similarity index 72%
rename from pl/math/sinh_3u.c
rename to math/aarch64/experimental/sinh_3u.c
index 1d86629ee2a352..39030d2750a92e 100644
--- a/pl/math/sinh_3u.c
+++ b/math/aarch64/experimental/sinh_3u.c
@@ -1,22 +1,19 @@
 /*
  * Double-precision sinh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "exp_inline.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define Half 0x3fe0000000000000
-#define OFlowBound                                                             \
-  0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results   \
-			in NaN.  */
-
-double
-__exp_dd (double, double);
+/* 0x1.62e42fefa39fp+9, above which using expm1 results in NaN.  */
+#define OFlowBound 0x40862e42fefa39f0
 
 /* Approximation for double-precision sinh(x) using expm1.
    sinh(x) = (exp(x) - exp(-x)) / 2.
@@ -44,7 +41,7 @@ sinh (double x)
 	 either. We use the identity: exp(a) = (exp(a / 2)) ^ 2
 	 to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2    for x > 0
 			    ~= (exp(|x| / 2)) ^ 2 / -2   for x < 0.  */
-      double e = __exp_dd (ax / 2, 0);
+      double e = exp_inline (ax / 2, 0);
       return (e * halfsign) * e;
     }
 
@@ -56,8 +53,8 @@ sinh (double x)
   return (t + t / (t + 1)) * halfsign;
 }
 
-PL_SIG (S, D, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (sinh, 2.08)
-PL_TEST_SYM_INTERVAL (sinh, 0, 0x1p-51, 100)
-PL_TEST_SYM_INTERVAL (sinh, 0x1p-51, 0x1.62e42fefa39fp+9, 100000)
-PL_TEST_SYM_INTERVAL (sinh, 0x1.62e42fefa39fp+9, inf, 1000)
+TEST_SIG (S, D, 1, sinh, -10.0, 10.0)
+TEST_ULP (sinh, 2.08)
+TEST_SYM_INTERVAL (sinh, 0, 0x1p-51, 100)
+TEST_SYM_INTERVAL (sinh, 0x1p-51, 0x1.62e42fefa39fp+9, 100000)
+TEST_SYM_INTERVAL (sinh, 0x1.62e42fefa39fp+9, inf, 1000)
diff --git a/pl/math/sinhf_2u3.c b/math/aarch64/experimental/sinhf_2u3.c
similarity index 69%
rename from pl/math/sinhf_2u3.c
rename to math/aarch64/experimental/sinhf_2u3.c
index aa7aadcf67c530..860ddc0fc83c26 100644
--- a/pl/math/sinhf_2u3.c
+++ b/math/aarch64/experimental/sinhf_2u3.c
@@ -1,25 +1,21 @@
 /*
  * Single-precision sinh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffff
 #define Half 0x3f000000
-#define Expm1OFlowLimit                                                        \
-  0x42b17218 /* 0x1.62e43p+6, 2^7*ln2, minimum value for which expm1f          \
-		overflows.  */
-#define OFlowLimit                                                             \
-  0x42b2d4fd /* 0x1.65a9fap+6, minimum positive value for which sinhf should   \
-		overflow.  */
-
-float
-optr_aor_exp_f32 (float);
+/* 0x1.62e43p+6, 2^7*ln2, minimum value for which expm1f overflows.  */
+#define Expm1OFlowLimit 0x42b17218
+/* 0x1.65a9fap+6, minimum positive value for which sinhf should overflow.  */
+#define OFlowLimit 0x42b2d4fd
 
 /* Approximation for single-precision sinh(x) using expm1.
    sinh(x) = (exp(x) - exp(-x)) / 2.
@@ -54,7 +50,7 @@ sinhf (float x)
 			    ~= (exp(|x| / 2)) ^ 2 / -2   for x < 0.
 	 Greatest error in this region is 1.89 ULP:
 	 sinhf(0x1.65898cp+6) got 0x1.f00aep+127  want 0x1.f00adcp+127.  */
-      float e = optr_aor_exp_f32 (ax / 2);
+      float e = expf (ax / 2);
       return (e * halfsign) * e;
     }
 
@@ -66,8 +62,8 @@ sinhf (float x)
   return (t + t / (t + 1)) * halfsign;
 }
 
-PL_SIG (S, F, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (sinhf, 1.76)
-PL_TEST_SYM_INTERVAL (sinhf, 0, 0x1.62e43p+6, 100000)
-PL_TEST_SYM_INTERVAL (sinhf, 0x1.62e43p+6, 0x1.65a9fap+6, 100)
-PL_TEST_SYM_INTERVAL (sinhf, 0x1.65a9fap+6, inf, 100)
+TEST_SIG (S, F, 1, sinh, -10.0, 10.0)
+TEST_ULP (sinhf, 1.76)
+TEST_SYM_INTERVAL (sinhf, 0, 0x1.62e43p+6, 100000)
+TEST_SYM_INTERVAL (sinhf, 0x1.62e43p+6, 0x1.65a9fap+6, 100)
+TEST_SYM_INTERVAL (sinhf, 0x1.65a9fap+6, inf, 100)
diff --git a/math/aarch64/experimental/sve/erfinv_25u.c b/math/aarch64/experimental/sve/erfinv_25u.c
new file mode 100644
index 00000000000000..4de6d08ab80fc6
--- /dev/null
+++ b/math/aarch64/experimental/sve/erfinv_25u.c
@@ -0,0 +1,156 @@
+/*
+ * Double-precision inverse error function (SVE variant).
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "sv_math.h"
+#include "test_defs.h"
+#include "math_config.h"
+#include "test_sig.h"
+#include "sv_poly_f64.h"
+#define SV_LOG_INLINE_POLY_ORDER 4
+#include "sv_log_inline.h"
+
+const static struct data
+{
+  /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
+      coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
+      of the denominator. P is interleaved P_17 and P_37, similar for Q.  */
+  double P[7][2], Q[7][2];
+  double P_57[9], Q_57[9], tailshift, P37_0;
+  struct sv_log_inline_data log_tbl;
+} data = {
+  .P37_0 = -0x1.f3596123109edp-7,
+  .tailshift = -0.87890625,
+  .P = { { 0x1.007ce8f01b2e8p+4, 0x1.60b8fe375999ep-2 },
+	 { -0x1.6b23cc5c6c6d7p+6, -0x1.779bb9bef7c0fp+1 },
+	 { 0x1.74e5f6ceb3548p+7, 0x1.786ea384470a2p+3 },
+	 { -0x1.5200bb15cc6bbp+7, -0x1.6a7c1453c85d3p+4 },
+	 { 0x1.05d193233a849p+6, 0x1.31f0fc5613142p+4 },
+	 { -0x1.148c5474ee5e1p+3, -0x1.5ea6c007d4dbbp+2 },
+	 { 0x1.689181bbafd0cp-3, 0x1.e66f265ce9e5p-3 } },
+  .Q = { { 0x1.d8fb0f913bd7bp+3, -0x1.636b2dcf4edbep-7 },
+	 { -0x1.6d7f25a3f1c24p+6, 0x1.0b5411e2acf29p-2 },
+	 { 0x1.a450d8e7f4cbbp+7, -0x1.3413109467a0bp+1 },
+	 { -0x1.bc3480485857p+7, 0x1.563e8136c554ap+3 },
+	 { 0x1.ae6b0c504ee02p+6, -0x1.7b77aab1dcafbp+4 },
+	 { -0x1.499dfec1a7f5fp+4, 0x1.8a3e174e05ddcp+4 },
+	 { 0x1p+0, -0x1.4075c56404eecp+3 } },
+  .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
+	    0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
+	    0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
+  .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
+	    0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
+	    0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2 },
+  .log_tbl = SV_LOG_CONSTANTS
+};
+
+static inline svfloat64_t
+special (svbool_t pg, svfloat64_t x, const struct data *d)
+{
+  /* Note erfinv(inf) should return NaN, and erfinv(1) should return Inf.
+     By using log here, instead of log1p, we return finite values for both
+     these inputs, and values outside [-1, 1]. This is non-compliant, but is an
+     acceptable optimisation at Ofast. To get correct behaviour for all finite
+     values use the log1p_inline helper on -abs(x) - note that erfinv(inf)
+     will still be finite.  */
+  svfloat64_t ax = svabs_x (pg, x);
+  svfloat64_t t
+      = svneg_x (pg, sv_log_inline (pg, svsubr_x (pg, ax, 1), &d->log_tbl));
+  t = svdivr_x (pg, svsqrt_x (pg, t), 1);
+  svuint64_t sign
+      = sveor_x (pg, svreinterpret_u64 (ax), svreinterpret_u64 (x));
+  svfloat64_t ts
+      = svreinterpret_f64 (svorr_x (pg, sign, svreinterpret_u64 (t)));
+
+  svfloat64_t q = svadd_x (pg, t, d->Q_57[8]);
+  for (int i = 7; i >= 0; i--)
+    q = svmad_x (pg, q, t, d->Q_57[i]);
+
+  return svdiv_x (pg, sv_horner_8_f64_x (pg, t, d->P_57), svmul_x (pg, ts, q));
+}
+
+static inline svfloat64_t
+lookup (const double *c, svuint64_t idx)
+{
+  svfloat64_t x = svld1rq_f64 (svptrue_b64 (), c);
+  return svtbl (x, idx);
+}
+
+static inline svfloat64_t
+notails (svbool_t pg, svfloat64_t x, const struct data *d)
+{
+  svfloat64_t t = svmad_x (pg, x, x, -0.5625);
+  svfloat64_t p = svmla_x (pg, sv_f64 (d->P[5][0]), t, d->P[6][0]);
+  svfloat64_t q = svadd_x (pg, t, d->Q[5][0]);
+  for (int i = 4; i >= 0; i--)
+    {
+      p = svmad_x (pg, t, p, d->P[i][0]);
+      q = svmad_x (pg, t, q, d->Q[i][0]);
+    }
+  p = svmul_x (pg, p, x);
+  return svdiv_x (pg, p, q);
+}
+
+/* Vector implementation of Blair et al's rational approximation to inverse
+   error function in double precision. Largest observed error is 24.75 ULP:
+   _ZGVsMxv_erfinv(0x1.fc861d81c2ba8p-1) got 0x1.ea05472686625p+0
+					want 0x1.ea0547268660cp+0.  */
+svfloat64_t SV_NAME_D1 (erfinv) (svfloat64_t x, svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  /* Calculate inverse error using algorithm described in
+     J. M. Blair, C. A. Edwards, and J. H. Johnson,
+     "Rational Chebyshev approximations for the inverse of the error function",
+     Math. Comp. 30, pp. 827--830 (1976).
+     https://doi.org/10.1090/S0025-5718-1976-0421040-7.
+
+     Algorithm has 3 intervals:
+     - 'Normal' region [-0.75, 0.75]
+     - Tail region [0.75, 0.9375] U [-0.9375, -0.75]
+     - Extreme tail [-1, -0.9375] U [0.9375, 1]
+     Normal and tail are both rational approximation of similar order on
+     shifted input - these are typically performed in parallel using gather
+     loads to obtain correct coefficients depending on interval.  */
+
+  svbool_t no_tail = svacle (pg, x, 0.75);
+  if (unlikely (!svptest_any (pg, svnot_z (pg, no_tail))))
+    return notails (pg, x, d);
+
+  svbool_t is_tail = svnot_z (pg, no_tail);
+  svbool_t extreme_tail = svacgt (pg, x, 0.9375);
+  svuint64_t idx = svdup_n_u64_z (is_tail, 1);
+
+  svfloat64_t t = svsel_f64 (is_tail, sv_f64 (d->tailshift), sv_f64 (-0.5625));
+  t = svmla_x (pg, t, x, x);
+
+  svfloat64_t p = lookup (&d->P[6][0], idx);
+  svfloat64_t q
+      = svmla_x (pg, lookup (&d->Q[6][0], idx), svdup_n_f64_z (is_tail, 1), t);
+  for (int i = 5; i >= 0; i--)
+    {
+      p = svmla_x (pg, lookup (&d->P[i][0], idx), p, t);
+      q = svmla_x (pg, lookup (&d->Q[i][0], idx), q, t);
+    }
+  p = svmad_m (is_tail, p, t, d->P37_0);
+  p = svmul_x (pg, p, x);
+
+  if (likely (svptest_any (pg, extreme_tail)))
+    return svsel (extreme_tail, special (pg, x, d), svdiv_x (pg, p, q));
+  return svdiv_x (pg, p, q);
+}
+
+#if USE_MPFR
+# warning Not generating tests for _ZGVsMxv_erfinv, as MPFR has no suitable reference
+#else
+TEST_SIG (SV, D, 1, erfinv, -0.99, 0.99)
+TEST_ULP (SV_NAME_D1 (erfinv), 24.5)
+TEST_DISABLE_FENV (SV_NAME_D1 (erfinv))
+/* Test with control lane in each interval.  */
+TEST_SYM_INTERVAL (SV_NAME_F1 (erfinv), 0, 1, 100000)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.5)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.8)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.95)
+#endif
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/experimental/sve/erfinvf_5u.c b/math/aarch64/experimental/sve/erfinvf_5u.c
new file mode 100644
index 00000000000000..2c81c4e0b9a23d
--- /dev/null
+++ b/math/aarch64/experimental/sve/erfinvf_5u.c
@@ -0,0 +1,156 @@
+/*
+ * Single-precision inverse error function (SVE variant).
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
+#include "sv_logf_inline.h"
+
+const static struct data
+{
+  /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N
+      is the coeffs of the numerator in table N of Blair et al, and
+      Q_N is the coeffs of the denominator. Coefficients stored in
+      interleaved format to support lookup scheme.  */
+  float P10_2, P29_3, Q10_2, Q29_2;
+  float P10_0, P29_1, P10_1, P29_2;
+  float Q10_0, Q29_0, Q10_1, Q29_1;
+  float P29_0, P_50[6], Q_50[2], tailshift;
+  struct sv_logf_data logf_tbl;
+} data = { .P10_0 = -0x1.a31268p+3,
+	   .P10_1 = 0x1.ac9048p+4,
+	   .P10_2 = -0x1.293ff6p+3,
+	   .P29_0 = -0x1.fc0252p-4,
+	   .P29_1 = 0x1.119d44p+0,
+	   .P29_2 = -0x1.f59ee2p+0,
+	   .P29_3 = 0x1.b13626p-2,
+	   .Q10_0 = -0x1.8265eep+3,
+	   .Q10_1 = 0x1.ef5eaep+4,
+	   .Q10_2 = -0x1.12665p+4,
+	   .Q29_0 = -0x1.69952p-4,
+	   .Q29_1 = 0x1.c7b7d2p-1,
+	   .Q29_2 = -0x1.167d7p+1,
+	   .P_50 = { 0x1.3d8948p-3, 0x1.61f9eap+0, 0x1.61c6bcp-1,
+		     -0x1.20c9f2p+0, 0x1.5c704cp-1, -0x1.50c6bep-3 },
+	   .Q_50 = { 0x1.3d7dacp-3, 0x1.629e5p+0 },
+	   .tailshift = -0.87890625,
+	   .logf_tbl = SV_LOGF_CONSTANTS };
+
+static inline svfloat32_t
+special (svbool_t pg, svfloat32_t x, const struct data *d)
+{
+  svfloat32_t ax = svabs_x (pg, x);
+  svfloat32_t t = svdivr_x (
+      pg,
+      svsqrt_x (pg, svneg_x (pg, sv_logf_inline (pg, svsubr_x (pg, ax, 1),
+						 &d->logf_tbl))),
+      1);
+  svuint32_t sign
+      = sveor_x (pg, svreinterpret_u32 (ax), svreinterpret_u32 (x));
+  svfloat32_t ts
+      = svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (t)));
+  svfloat32_t q
+      = svmla_x (pg, sv_f32 (d->Q_50[0]), svadd_x (pg, t, d->Q_50[1]), t);
+  return svdiv_x (pg, sv_horner_5_f32_x (pg, t, d->P_50), svmul_x (pg, ts, q));
+}
+
+static inline svfloat32_t
+notails (svbool_t pg, svfloat32_t x, const struct data *d)
+{
+  /* Shortcut when no input is in a tail region - no need to gather shift or
+     coefficients.  */
+  svfloat32_t t = svmad_x (pg, x, x, -0.5625);
+  svfloat32_t q = svadd_x (pg, t, d->Q10_2);
+  q = svmad_x (pg, t, q, d->Q10_1);
+  q = svmad_x (pg, t, q, d->Q10_0);
+
+  svfloat32_t p = svmla_x (pg, sv_f32 (d->P10_1), t, d->P10_2);
+  p = svmad_x (pg, p, t, d->P10_0);
+
+  return svdiv_x (pg, svmul_x (pg, x, p), q);
+}
+
+/* Vector implementation of Blair et al's rational approximation to inverse
+   error function in single-precision. Worst-case error is 4.71 ULP, in the
+   tail region:
+   _ZGVsMxv_erfinvf(0x1.f84e9ap-1) got 0x1.b8326ap+0
+				  want 0x1.b83274p+0.  */
+svfloat32_t SV_NAME_F1 (erfinv) (svfloat32_t x, svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* Calculate inverse error using algorithm described in
+     J. M. Blair, C. A. Edwards, and J. H. Johnson,
+     "Rational Chebyshev approximations for the inverse of the error function",
+     Math. Comp. 30, pp. 827--830 (1976).
+     https://doi.org/10.1090/S0025-5718-1976-0421040-7.  */
+
+  /* Algorithm has 3 intervals:
+     - 'Normal' region [-0.75, 0.75]
+     - Tail region [0.75, 0.9375] U [-0.9375, -0.75]
+     - Extreme tail [-1, -0.9375] U [0.9375, 1]
+     Normal and tail are both rational approximation of similar order on
+     shifted input - these are typically performed in parallel using gather
+     loads to obtain correct coefficients depending on interval.  */
+  svbool_t is_tail = svacge (pg, x, 0.75);
+  svbool_t extreme_tail = svacge (pg, x, 0.9375);
+
+  if (likely (!svptest_any (pg, is_tail)))
+    return notails (pg, x, d);
+
+  /* Select requisite shift depending on interval: polynomial is evaluated on
+     x * x - shift.
+     Normal shift = 0.5625
+     Tail shift   = 0.87890625.  */
+  svfloat32_t t = svmla_x (
+      pg, svsel (is_tail, sv_f32 (d->tailshift), sv_f32 (-0.5625)), x, x);
+
+  svuint32_t idx = svdup_u32_z (is_tail, 1);
+  svuint32_t idxhi = svadd_x (pg, idx, 2);
+
+  /* Load coeffs in quadwords and select them according to interval.  */
+  svfloat32_t pqhi = svld1rq (svptrue_b32 (), &d->P10_2);
+  svfloat32_t plo = svld1rq (svptrue_b32 (), &d->P10_0);
+  svfloat32_t qlo = svld1rq (svptrue_b32 (), &d->Q10_0);
+
+  svfloat32_t p2 = svtbl (pqhi, idx);
+  svfloat32_t p1 = svtbl (plo, idxhi);
+  svfloat32_t p0 = svtbl (plo, idx);
+  svfloat32_t q0 = svtbl (qlo, idx);
+  svfloat32_t q1 = svtbl (qlo, idxhi);
+  svfloat32_t q2 = svtbl (pqhi, idxhi);
+
+  svfloat32_t p = svmla_x (pg, p1, p2, t);
+  p = svmla_x (pg, p0, p, t);
+  /* Tail polynomial has higher order - merge with normal lanes.  */
+  p = svmad_m (is_tail, p, t, d->P29_0);
+  svfloat32_t y = svmul_x (pg, x, p);
+
+  /* Least significant term of both Q polynomials is 1, so no need to generate
+     it.  */
+  svfloat32_t q = svadd_x (pg, t, q2);
+  q = svmla_x (pg, q1, q, t);
+  q = svmla_x (pg, q0, q, t);
+
+  if (unlikely (svptest_any (pg, extreme_tail)))
+    return svsel (extreme_tail, special (extreme_tail, x, d),
+		  svdiv_x (pg, y, q));
+  return svdiv_x (pg, y, q);
+}
+
+#if USE_MPFR
+# warning Not generating tests for _ZGVsMxv_erfinvf, as MPFR has no suitable reference
+#else
+TEST_SIG (SV, F, 1, erfinv, -0.99, 0.99)
+TEST_ULP (SV_NAME_F1 (erfinv), 4.09)
+TEST_DISABLE_FENV (SV_NAME_F1 (erfinv))
+TEST_SYM_INTERVAL (SV_NAME_F1 (erfinv), 0, 1, 40000)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.5)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.8)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.95)
+#endif
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_powi.c b/math/aarch64/experimental/sve/powi.c
similarity index 96%
rename from pl/math/sv_powi.c
rename to math/aarch64/experimental/sve/powi.c
index e53bf219553362..62dd1b11497073 100644
--- a/pl/math/sv_powi.c
+++ b/math/aarch64/experimental/sve/powi.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision SVE powi(x, n) function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -46,3 +46,4 @@ _ZGVsMxvv_powk (svfloat64_t as, svint64_t ns, svbool_t p)
 
   return acc;
 }
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_powif.c b/math/aarch64/experimental/sve/powif.c
similarity index 96%
rename from pl/math/sv_powif.c
rename to math/aarch64/experimental/sve/powif.c
index 7e032fd86a2047..fd74acf12df79f 100644
--- a/pl/math/sv_powif.c
+++ b/math/aarch64/experimental/sve/powif.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision SVE powi(x, n) function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -46,3 +46,4 @@ _ZGVsMxvv_powi (svfloat32_t as, svint32_t ns, svbool_t p)
 
   return acc;
 }
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/experimental/sve/sv_logf_inline.h b/math/aarch64/experimental/sve/sv_logf_inline.h
new file mode 100644
index 00000000000000..c317a23f6fc36e
--- /dev/null
+++ b/math/aarch64/experimental/sve/sv_logf_inline.h
@@ -0,0 +1,51 @@
+/*
+ * Single-precision vector log function - inline version
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+
+struct sv_logf_data
+{
+  float p1, p3, p5, p6, p0, p2, p4;
+  float ln2;
+  uint32_t off, mantissa_mask;
+};
+
+#define SV_LOGF_CONSTANTS                                                     \
+  {                                                                           \
+    .p0 = -0x1.ffffc8p-2f, .p1 = 0x1.555d7cp-2f, .p2 = -0x1.00187cp-2f,       \
+    .p3 = 0x1.961348p-3f, .p4 = -0x1.4f9934p-3f, .p5 = 0x1.5a9aa2p-3f,        \
+    .p6 = -0x1.3e737cp-3f, .ln2 = 0x1.62e43p-1f, .off = 0x3f2aaaab,           \
+    .mantissa_mask = 0x007fffff                                               \
+  }
+
+static inline svfloat32_t
+sv_logf_inline (svbool_t pg, svfloat32_t x, const struct sv_logf_data *d)
+{
+  svuint32_t u = svreinterpret_u32 (x);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u = svsub_x (pg, u, d->off);
+  svfloat32_t n = svcvt_f32_s32_x (
+      pg, svasr_x (pg, svreinterpret_s32_u32 (u), 23)); /* signextend.  */
+  u = svand_x (pg, u, d->mantissa_mask);
+  u = svadd_x (pg, u, d->off);
+  svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
+
+  /* y = log(1+r) + n*ln2.  */
+  svfloat32_t r2 = svmul_x (pg, r, r);
+  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
+  svfloat32_t p1356 = svld1rq_f32 (svptrue_b32 (), &d->p1);
+  svfloat32_t p = svmla_lane (sv_f32 (d->p4), r, p1356, 2);
+  svfloat32_t q = svmla_lane (sv_f32 (d->p2), r, p1356, 1);
+  svfloat32_t y = svmla_lane (sv_f32 (d->p0), r, p1356, 0);
+  p = svmla_lane (p, r2, p1356, 3);
+  q = svmla_x (pg, q, p, r2);
+  y = svmla_x (pg, y, q, r2);
+  p = svmla_x (pg, r, n, d->ln2);
+
+  return svmla_x (pg, p, y, r2);
+}
diff --git a/pl/math/tanf_3u3.c b/math/aarch64/experimental/tanf_3u3.c
similarity index 80%
rename from pl/math/tanf_3u3.c
rename to math/aarch64/experimental/tanf_3u3.c
index 30c86fa89730c3..c26e92db588fcb 100644
--- a/pl/math/tanf_3u3.c
+++ b/math/aarch64/experimental/tanf_3u3.c
@@ -1,12 +1,12 @@
 /*
  * Single-precision scalar tan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "poly_scalar_f32.h"
 
 /* Useful constants.  */
@@ -52,14 +52,6 @@ reduce (float x, int32_t *in)
   return r;
 }
 
-/* Table with 4/PI to 192 bit precision.  To avoid unaligned accesses
-   only 8 new bits are added per entry, making the table 4 times larger.  */
-static const uint32_t __inv_pio4[24]
-  = {0x000000a2, 0x0000a2f9, 0x00a2f983, 0xa2f9836e, 0xf9836e4e, 0x836e4e44,
-     0x6e4e4415, 0x4e441529, 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
-     0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 0x34ddc0db, 0xddc0db62,
-     0xc0db6295, 0xdb629599, 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041};
-
 /* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
    XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
    Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
@@ -130,11 +122,11 @@ tanf (float x)
       return fmaf (x2, x * y, x);
     }
   /* Similar to other trigonometric routines, fast inaccurate reduction is
-     performed for values of x from pi/4 up to RangeVal. In order to keep errors
-     below 3.5ulps, we set the value of RangeVal to 2^17. This might differ for
-     other trigonometric routines. Above this value more advanced but slower
-     reduction techniques need to be implemented to reach a similar accuracy.
-  */
+     performed for values of x from pi/4 up to RangeVal. In order to keep
+     errors below 3.5ulps, we set the value of RangeVal to 2^17. This might
+     differ for other trigonometric routines. Above this value more advanced
+     but slower reduction techniques need to be implemented to reach a similar
+     accuracy.  */
   else if (ia12 < top12 (RangeVal))
     {
       /* Fast inaccurate reduction.  */
@@ -182,12 +174,12 @@ tanf (float x)
   return fmaf (scale, p, offset);
 }
 
-PL_SIG (S, F, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (tanf, 2.80)
-PL_TEST_INTERVAL (tanf, 0, 0xffff0000, 10000)
-PL_TEST_SYM_INTERVAL (tanf, 0x1p-127, 0x1p-14, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 0x1p-14, 0.7, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 0.7, 1.5, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 1.5, 0x1p17, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 0x1p17, 0x1p54, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 0x1p54, inf, 50000)
+TEST_SIG (S, F, 1, tan, -3.1, 3.1)
+TEST_ULP (tanf, 2.80)
+TEST_INTERVAL (tanf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (tanf, 0x1p-127, 0x1p-14, 50000)
+TEST_SYM_INTERVAL (tanf, 0x1p-14, 0.7, 50000)
+TEST_SYM_INTERVAL (tanf, 0.7, 1.5, 50000)
+TEST_SYM_INTERVAL (tanf, 1.5, 0x1p17, 50000)
+TEST_SYM_INTERVAL (tanf, 0x1p17, 0x1p54, 50000)
+TEST_SYM_INTERVAL (tanf, 0x1p54, inf, 50000)
diff --git a/pl/math/tanf_data.c b/math/aarch64/experimental/tanf_data.c
similarity index 96%
rename from pl/math/tanf_data.c
rename to math/aarch64/experimental/tanf_data.c
index a6b9d512eed2c1..f310cd77d4ecbc 100644
--- a/pl/math/tanf_data.c
+++ b/math/aarch64/experimental/tanf_data.c
@@ -1,7 +1,7 @@
 /*
  * Data used in single-precision tan(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/tanh_3u.c b/math/aarch64/experimental/tanh_3u.c
similarity index 80%
rename from pl/math/tanh_3u.c
rename to math/aarch64/experimental/tanh_3u.c
index 86f2904afc32d0..838b6c4f12c133 100644
--- a/pl/math/tanh_3u.c
+++ b/math/aarch64/experimental/tanh_3u.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision tanh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
 #include "poly_scalar_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define InvLn2 0x1.71547652b82fep0
@@ -15,8 +15,10 @@
 #define Ln2lo 0x1.abc9e3b39803fp-56
 #define Shift 0x1.8p52
 
-#define BoringBound 0x403241bf835f9d5f /* asuint64 (0x1.241bf835f9d5fp+4).  */
-#define TinyBound 0x3e40000000000000   /* asuint64 (0x1p-27).  */
+/* asuint64 (0x1.241bf835f9d5fp+4).  */
+#define BoringBound 0x403241bf835f9d5f
+/* asuint64 (0x1p-27).  */
+#define TinyBound 0x3e40000000000000
 #define One 0x3ff0000000000000
 
 static inline double
@@ -71,8 +73,8 @@ tanh (double x)
   return q / (q + 2);
 }
 
-PL_SIG (S, D, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (tanh, 2.27)
-PL_TEST_SYM_INTERVAL (tanh, 0, TinyBound, 1000)
-PL_TEST_SYM_INTERVAL (tanh, TinyBound, BoringBound, 100000)
-PL_TEST_SYM_INTERVAL (tanh, BoringBound, inf, 1000)
+TEST_SIG (S, D, 1, tanh, -10.0, 10.0)
+TEST_ULP (tanh, 2.27)
+TEST_SYM_INTERVAL (tanh, 0, TinyBound, 1000)
+TEST_SYM_INTERVAL (tanh, TinyBound, BoringBound, 100000)
+TEST_SYM_INTERVAL (tanh, BoringBound, inf, 1000)
diff --git a/pl/math/tanhf_2u6.c b/math/aarch64/experimental/tanhf_2u6.c
similarity index 79%
rename from pl/math/tanhf_2u6.c
rename to math/aarch64/experimental/tanhf_2u6.c
index 93ea3cf5d865ae..d9adae5c3a76bb 100644
--- a/pl/math/tanhf_2u6.c
+++ b/math/aarch64/experimental/tanhf_2u6.c
@@ -1,16 +1,15 @@
 /*
  * Single-precision tanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
-#define BoringBound                                                            \
-  0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for        \
-		negative).  */
+/* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative).  */
+#define BoringBound 0x41102cb3
 #define AbsMask 0x7fffffff
 #define One 0x3f800000
 
@@ -26,8 +25,8 @@ expm1f_inline (float x)
 {
   /* Helper routine for calculating exp(x) - 1.
      Copied from expm1f_1u6.c, with several simplifications:
-     - No special-case handling for tiny or special values, instead return early
-       from the main routine.
+     - No special-case handling for tiny or special values, instead return
+       early from the main routine.
      - No special handling for large values:
        - No early return for infinity.
        - Simpler combination of p and t in final stage of algorithm.
@@ -81,8 +80,8 @@ tanhf (float x)
   return q / (q + 2);
 }
 
-PL_SIG (S, F, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (tanhf, 2.09)
-PL_TEST_SYM_INTERVAL (tanhf, 0, 0x1p-23, 1000)
-PL_TEST_SYM_INTERVAL (tanhf, 0x1p-23, 0x1.205966p+3, 100000)
-PL_TEST_SYM_INTERVAL (tanhf, 0x1.205966p+3, inf, 100)
+TEST_SIG (S, F, 1, tanh, -10.0, 10.0)
+TEST_ULP (tanhf, 2.09)
+TEST_SYM_INTERVAL (tanhf, 0, 0x1p-23, 1000)
+TEST_SYM_INTERVAL (tanhf, 0x1p-23, 0x1.205966p+3, 100000)
+TEST_SYM_INTERVAL (tanhf, 0x1.205966p+3, inf, 100)
diff --git a/math/aarch64/sincospi_4u.c b/math/aarch64/sincospi_4u.c
new file mode 100644
index 00000000000000..2a944bed23e15d
--- /dev/null
+++ b/math/aarch64/sincospi_4u.c
@@ -0,0 +1,158 @@
+/*
+ * Double-precision scalar sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "math_config.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "poly_scalar_f64.h"
+
+/* Taylor series coefficents for sin(pi * x).
+   C2 coefficient (orginally ~=5.16771278) has been split into two parts:
+   C2_hi = 4, C2_lo = C2 - C2_hi (~=1.16771278)
+   This change in magnitude reduces floating point rounding errors.
+   C2_hi is then reintroduced after the polynomial approxmation.  */
+const static struct sincospi_data
+{
+  double poly[10];
+} sincospi_data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .poly = { 0x1.921fb54442d184p1, -0x1.2aef39896f94bp0, 0x1.466bc6775ab16p1,
+	    -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
+	    0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
+	    0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
+};
+
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint64_t
+abstop12 (double x)
+{
+  return (asuint64 (x) >> 52) & 0x7ff;
+}
+
+/* Triages special cases into 4 categories:
+     -1 or +1 if iy represents half an integer
+       -1 if round(y) is odd.
+       +1 if round(y) is even.
+     -2 or +2 if iy represents and integer.
+       -2 if iy is odd.
+       +2 if iy is even.
+   The argument is the bit representation of a positive non-zero
+   finite floating-point value which is either a half or an integer.  */
+static inline int
+checkint (uint64_t iy)
+{
+  int e = iy >> 52;
+  if (e > 0x3ff + 52)
+    return 2;
+  if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+    {
+      if ((iy - 1) & 2)
+	return -1;
+      else
+	return 1;
+    }
+  if (iy & (1 << (0x3ff + 52 - e)))
+    return -2;
+  return 2;
+}
+
+/* Approximation for scalar double-precision sincospi(x).
+   Maximum error for sin: 3.46 ULP:
+      sincospif_sin(0x1.3d8a067cd8961p+14) got 0x1.ffe609a279008p-1 want
+   0x1.ffe609a27900cp-1.
+   Maximum error for cos: 3.66 ULP:
+      sincospif_cos(0x1.a0ec6997557eep-24) got 0x1.ffffffffffe59p-1 want
+   0x1.ffffffffffe5dp-1.  */
+void
+arm_math_sincospi (double x, double *out_sin, double *out_cos)
+{
+  const struct sincospi_data *d = ptr_barrier (&sincospi_data);
+  uint64_t sign = asuint64 (x) & 0x8000000000000000;
+
+  if (likely (abstop12 (x) < abstop12 (0x1p51)))
+    {
+      /* ax = |x| - n (range reduction into -1/2 .. 1/2).  */
+      double ar_s = x - rint (x);
+
+      /* We know that cospi(x) = sinpi(0.5 - x)
+	 range reduction and offset into sinpi range -1/2 .. 1/2
+	 ax = 0.5 - |x - rint(x)|.  */
+      double ar_c = 0.5 - fabs (ar_s);
+
+      /* ss = sin(pi * ax).  */
+      double ar2_s = ar_s * ar_s;
+      double ar2_c = ar_c * ar_c;
+      double ar4_s = ar2_s * ar2_s;
+      double ar4_c = ar2_c * ar2_c;
+
+      uint64_t cc_sign = ((uint64_t) llrint (x)) << 63;
+      uint64_t ss_sign = cc_sign;
+      if (ar_s == 0)
+	ss_sign = sign;
+
+      double ss = pw_horner_9_f64 (ar2_s, ar4_s, d->poly);
+      double cc = pw_horner_9_f64 (ar2_c, ar4_c, d->poly);
+
+      /* As all values are reduced to -1/2 .. 1/2, the result of cos(x)
+	 always be positive, therefore, the sign must be introduced
+	 based upon if x rounds to odd or even. For sin(x) the sign is
+	 copied from x.  */
+      *out_sin
+	  = asdouble (asuint64 (fma (-4 * ar2_s, ar_s, ss * ar_s)) ^ ss_sign);
+      *out_cos
+	  = asdouble (asuint64 (fma (-4 * ar2_c, ar_c, cc * ar_c)) ^ cc_sign);
+    }
+  else
+    {
+      /* When abs(x) > 0x1p51, the x will be either
+	    - Half integer (relevant if abs(x) in [0x1p51, 0x1p52])
+	    - Odd integer  (relevant if abs(x) in [0x1p52, 0x1p53])
+	    - Even integer (relevant if abs(x) in [0x1p53, inf])
+	    - Inf or NaN.  */
+      if (abstop12 (x) >= 0x7ff)
+	{
+	  double inv_result = __math_invalid (x);
+	  *out_sin = inv_result;
+	  *out_cos = inv_result;
+	  return;
+	}
+      else
+	{
+	  uint64_t ax = asuint64 (x) & 0x7fffffffffffffff;
+	  int m = checkint (ax);
+	  /* The case where ax is half integer.  */
+	  if (m & 1)
+	    {
+	      *out_sin = sign ? -m : m;
+	      *out_cos = 0;
+	      return;
+	    }
+	  /* The case where ax is integer.  */
+	  else
+	    {
+	      *out_sin = asdouble (sign);
+	      *out_cos = m >> 1;
+	      return;
+	    }
+	}
+    }
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (arm_math_sincospi_sin)
+TEST_DISABLE_FENV (arm_math_sincospi_cos)
+TEST_ULP (arm_math_sincospi_sin, 2.96)
+TEST_ULP (arm_math_sincospi_cos, 3.16)
+#  define SINCOS_INTERVAL(lo, hi, n)                                          \
+    TEST_SYM_INTERVAL (arm_math_sincospi_sin, lo, hi, n)                      \
+    TEST_SYM_INTERVAL (arm_math_sincospi_cos, lo, hi, n)
+SINCOS_INTERVAL (0, 0x1p-63, 10000)
+SINCOS_INTERVAL (0x1p-63, 0.5, 50000)
+SINCOS_INTERVAL (0.5, 0x1p51, 50000)
+SINCOS_INTERVAL (0x1p51, inf, 10000)
+#endif
diff --git a/math/aarch64/sincospif_3u2.c b/math/aarch64/sincospif_3u2.c
new file mode 100644
index 00000000000000..b79694d2ac656f
--- /dev/null
+++ b/math/aarch64/sincospif_3u2.c
@@ -0,0 +1,145 @@
+/*
+ * Single-precision scalar sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "poly_scalar_f32.h"
+
+/* Taylor series coefficents for sin(pi * x).  */
+const static struct sincospif_data
+{
+  float poly[6];
+} sincospif_data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
+	    0x1.50783p-4f, -0x1.e30750p-8f },
+};
+
+/* Top 12 bits of the float representation with the sign bit cleared.  */
+static inline uint32_t
+abstop12 (float x)
+{
+  return (asuint (x) >> 20) & 0x7ff;
+}
+
+/* Triages special cases into 4 categories:
+     -1 or +1 if iy represents half an integer
+       -1 if round(y) is odd.
+       +1 if round(y) is even.
+     -2 or +2 if iy represents and integer.
+       -2 if iy is odd.
+       +2 if iy is even.
+   The argument is the bit representation of a positive non-zero
+   finite floating-point value which is either a half or an integer.  */
+static inline int
+checkint (uint32_t iy)
+{
+  int e = iy >> 23;
+  if (e > 0x7f + 23)
+    return 2;
+  if (iy & ((1 << (0x7f + 23 - e)) - 1))
+    {
+      if ((iy - 1) & 2)
+	return -1;
+      else
+	return 1;
+    }
+  if (iy & (1 << (0x7f + 23 - e)))
+    return -2;
+  return 2;
+}
+
+/* Approximation for scalar single-precision sincospif(x).
+   Maximum error for sin: 3.04 ULP:
+      sincospif_sin(0x1.c597ccp-2) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+   Maximum error for cos: 3.18 ULP:
+      sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.  */
+void
+arm_math_sincospif (float x, float *out_sin, float *out_cos)
+{
+
+  const struct sincospif_data *d = ptr_barrier (&sincospif_data);
+  uint32_t sign = asuint (x) & 0x80000000;
+
+  /* abs(x) in [0, 0x1p22].  */
+  if (likely (abstop12 (x) < abstop12 (0x1p22)))
+    {
+      /* ar_s = x - n (range reduction into -1/2 .. 1/2).  */
+      float ar_s = x - rintf (x);
+      /* We know that cospi(x) = sinpi(0.5 - x)
+      range reduction and offset into sinpi range -1/2 .. 1/2
+      ar_c = 0.5 - |x - n|.  */
+      float ar_c = 0.5f - fabsf (ar_s);
+
+      float ar2_s = ar_s * ar_s;
+      float ar2_c = ar_c * ar_c;
+      float ar4_s = ar2_s * ar2_s;
+      float ar4_c = ar2_c * ar2_c;
+
+      uint32_t cc_sign = lrintf (x) << 31;
+      uint32_t ss_sign = cc_sign;
+      if (ar_s == 0)
+	ss_sign = sign;
+
+      /* As all values are reduced to -1/2 .. 1/2, the result of cos(x)
+      always be positive, therefore, the sign must be introduced
+      based upon if x rounds to odd or even. For sin(x) the sign is
+      copied from x.  */
+      *out_sin = pw_horner_5_f32 (ar2_s, ar4_s, d->poly)
+		 * asfloat (asuint (ar_s) ^ ss_sign);
+      *out_cos = pw_horner_5_f32 (ar2_c, ar4_c, d->poly)
+		 * asfloat (asuint (ar_c) ^ cc_sign);
+      return;
+    }
+  else
+    {
+      /* When abs(x) > 0x1p22, the x will be either
+	    - Half integer (relevant if abs(x) in [0x1p22, 0x1p23])
+	    - Odd integer  (relevant if abs(x) in [0x1p22, 0x1p24])
+	    - Even integer (relevant if abs(x) in [0x1p22, inf])
+	    - Inf or NaN.  */
+      if (abstop12 (x) >= 0x7f8)
+	{
+	  float inv_result = __math_invalidf (x);
+	  *out_sin = inv_result;
+	  *out_cos = inv_result;
+	  return;
+	}
+      else
+	{
+	  uint32_t ax = asuint (x) & 0x7fffffff;
+	  int m = checkint (ax);
+	  if (m & 1)
+	    {
+	      *out_sin = sign ? -m : m;
+	      *out_cos = 0;
+	      return;
+	    }
+	  else
+	    {
+	      *out_sin = asfloat (sign);
+	      *out_cos = m >> 1;
+	      return;
+	    }
+	}
+    }
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (arm_math_sincospif_sin)
+TEST_DISABLE_FENV (arm_math_sincospif_cos)
+TEST_ULP (arm_math_sincospif_sin, 2.54)
+TEST_ULP (arm_math_sincospif_cos, 2.68)
+#  define SINCOSPIF_INTERVAL(lo, hi, n)                                       \
+    TEST_SYM_INTERVAL (arm_math_sincospif_sin, lo, hi, n)                     \
+    TEST_SYM_INTERVAL (arm_math_sincospif_cos, lo, hi, n)
+SINCOSPIF_INTERVAL (0, 0x1p-31, 10000)
+SINCOSPIF_INTERVAL (0x1p-31, 1, 50000)
+SINCOSPIF_INTERVAL (1, 0x1p22f, 50000)
+SINCOSPIF_INTERVAL (0x1p22f, inf, 10000)
+#endif
diff --git a/pl/math/sinpi_3u.c b/math/aarch64/sinpi_3u5.c
similarity index 76%
rename from pl/math/sinpi_3u.c
rename to math/aarch64/sinpi_3u5.c
index a04a352a62e635..f96d9a312b53ca 100644
--- a/pl/math/sinpi_3u.c
+++ b/math/aarch64/sinpi_3u5.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision scalar sinpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,8 +9,8 @@
 #include <math.h>
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "poly_scalar_f64.h"
 
 /* Taylor series coefficents for sin(pi * x).
@@ -25,15 +25,17 @@ static const double poly[]
 	-0x1.012a9870eeb7dp-25 };
 
 #define Shift 0x1.8p+52
+/* TODO Store constant in structure for more efficient load.  */
+#define Pi 0x1.921fb54442d18p+1
 
 /* Approximation for scalar double-precision sinpi(x).
    Maximum error: 3.03 ULP:
    sinpi(0x1.a90da2818f8b5p+7) got 0x1.fe358f255a4b3p-1
 			      want 0x1.fe358f255a4b6p-1.  */
 double
-sinpi (double x)
+arm_math_sinpi (double x)
 {
-  if (isinf (x))
+  if (isinf (x) || isnan (x))
     return __math_invalid (x);
 
   double r = asdouble (asuint64 (x) & ~0x8000000000000000);
@@ -42,17 +44,17 @@ sinpi (double x)
   /* Edge cases for when sinpif should be exactly 0. (Integers)
      0x1p53 is the limit for single precision to store any decimal places.  */
   if (r >= 0x1p53)
-    return 0;
+    return asdouble (sign);
 
   /* If x is an integer, return 0.  */
   uint64_t m = (uint64_t) r;
   if (r == m)
-    return 0;
+    return asdouble (sign);
 
   /* For very small inputs, squaring r causes underflow.
      Values below this threshold can be approximated via sinpi(x) ≈ pi*x.  */
   if (r < 0x1p-63)
-    return M_PI * x;
+    return Pi * x;
 
   /* Any non-integer values >= 0x1x51 will be int + 0.5.
      These values should return exactly 1 or -1.  */
@@ -82,9 +84,18 @@ sinpi (double x)
   return asdouble (asuint64 (y) ^ sign);
 }
 
-PL_SIG (S, D, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (sinpi, 2.53)
-PL_TEST_SYM_INTERVAL (sinpi, 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (sinpi, 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (sinpi, 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (sinpi, 0x1p51, inf, 10000)
+#if WANT_EXPERIMENTAL_MATH
+double
+sinpi (double x)
+{
+  return arm_math_sinpi (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_sinpi, 2.53)
+TEST_SYM_INTERVAL (arm_math_sinpi, 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (arm_math_sinpi, 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (arm_math_sinpi, 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (arm_math_sinpi, 0x1p51, inf, 10000)
+#endif
diff --git a/pl/math/sinpif_2u5.c b/math/aarch64/sinpif_2u5.c
similarity index 75%
rename from pl/math/sinpif_2u5.c
rename to math/aarch64/sinpif_2u5.c
index af9ca0573b374f..b5d9cd9145771c 100644
--- a/pl/math/sinpif_2u5.c
+++ b/math/aarch64/sinpif_2u5.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision scalar sinpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Taylor series coefficents for sin(pi * x).  */
 #define C0 0x1.921fb6p1f
@@ -25,9 +25,9 @@
    sinpif(0x1.d062b6p-2) got 0x1.fa8c06p-1
 			want 0x1.fa8c02p-1.  */
 float
-sinpif (float x)
+arm_math_sinpif (float x)
 {
-  if (isinf (x))
+  if (isinf (x) || isnan (x))
     return __math_invalidf (x);
 
   float r = asfloat (asuint (x) & ~0x80000000);
@@ -36,11 +36,11 @@ sinpif (float x)
   /* Edge cases for when sinpif should be exactly 0. (Integers)
      0x1p23 is the limit for single precision to store any decimal places.  */
   if (r >= 0x1p23f)
-    return 0;
+    return asfloat (sign);
 
   int32_t m = roundf (r);
   if (m == r)
-    return 0;
+    return asfloat (sign);
 
   /* For very small inputs, squaring r causes underflow.
      Values below this threshold can be approximated via sinpi(x) ~= pi*x.  */
@@ -75,9 +75,18 @@ sinpif (float x)
   return asfloat (asuint (y * r) ^ sign);
 }
 
-PL_SIG (S, F, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (sinpif, 1.99)
-PL_TEST_SYM_INTERVAL (sinpif, 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (sinpif, 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (sinpif, 0.5, 0x1p22f, 10000)
-PL_TEST_SYM_INTERVAL (sinpif, 0x1p22f, inf, 10000)
+#if WANT_EXPERIMENTAL_MATH
+float
+sinpif (float x)
+{
+  return arm_math_sinpif (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_sinpif, 1.99)
+TEST_SYM_INTERVAL (arm_math_sinpif, 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (arm_math_sinpif, 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (arm_math_sinpif, 0.5, 0x1p22f, 10000)
+TEST_SYM_INTERVAL (arm_math_sinpif, 0x1p22f, inf, 10000)
+#endif
diff --git a/pl/math/sv_acos_2u.c b/math/aarch64/sve/acos.c
similarity index 85%
rename from pl/math/sv_acos_2u.c
rename to math/aarch64/sve/acos.c
index e06db6cae6af9d..da633392aa3e20 100644
--- a/pl/math/sv_acos_2u.c
+++ b/math/aarch64/sve/acos.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision SVE acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -82,10 +82,12 @@ svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
   return svmla_x (pg, add, mul, y);
 }
 
-PL_SIG (SV, D, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_D1 (acos), 1.02)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0, 0.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), -0, -inf, 20000)
+TEST_SIG (SV, D, 1, acos, -1.0, 1.0)
+TEST_ULP (SV_NAME_D1 (acos), 1.02)
+TEST_DISABLE_FENV (SV_NAME_D1 (acos))
+TEST_INTERVAL (SV_NAME_D1 (acos), 0, 0.5, 50000)
+TEST_INTERVAL (SV_NAME_D1 (acos), 0.5, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (acos), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (SV_NAME_D1 (acos), 0x1p11, inf, 20000)
+TEST_INTERVAL (SV_NAME_D1 (acos), -0, -inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_acosf_1u4.c b/math/aarch64/sve/acosf.c
similarity index 83%
rename from pl/math/sv_acosf_1u4.c
rename to math/aarch64/sve/acosf.c
index 7ac59ceedfbdb0..86b7822cefc3dc 100644
--- a/pl/math/sv_acosf_1u4.c
+++ b/math/aarch64/sve/acosf.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision SVE acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -75,10 +75,12 @@ svfloat32_t SV_NAME_F1 (acos) (svfloat32_t x, const svbool_t pg)
   return svmla_x (pg, add, mul, y);
 }
 
-PL_SIG (SV, F, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_F1 (acos), 0.82)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0, 0.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), -0, -inf, 20000)
+TEST_SIG (SV, F, 1, acos, -1.0, 1.0)
+TEST_ULP (SV_NAME_F1 (acos), 0.82)
+TEST_DISABLE_FENV (SV_NAME_F1 (acos))
+TEST_INTERVAL (SV_NAME_F1 (acos), 0, 0.5, 50000)
+TEST_INTERVAL (SV_NAME_F1 (acos), 0.5, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (acos), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (SV_NAME_F1 (acos), 0x1p11, inf, 20000)
+TEST_INTERVAL (SV_NAME_F1 (acos), -0, -inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/acosh.c b/math/aarch64/sve/acosh.c
new file mode 100644
index 00000000000000..d54c21922e1b18
--- /dev/null
+++ b/math/aarch64/sve/acosh.c
@@ -0,0 +1,51 @@
+/*
+ * Double-precision SVE acosh(x) function.
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define WANT_SV_LOG1P_K0_SHORTCUT 1
+#include "sv_log1p_inline.h"
+
+#define One (0x3ff0000000000000)
+#define Thres (0x1ff0000000000000) /* asuint64 (0x1p511) - One.  */
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+  return sv_call_f64 (acosh, x, y, special);
+}
+
+/* SVE approximation for double-precision acosh, based on log1p.
+   The largest observed error is 3.19 ULP in the region where the
+   argument to log1p falls in the k=0 interval, i.e. x close to 1:
+   SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
+					   want 0x1.ed23399f51373p-2.  */
+svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
+{
+  /* (ix - One) >= (BigBound - One).  */
+  svuint64_t ix = svreinterpret_u64 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
+
+  svfloat64_t xm1 = svsub_x (pg, x, 1.0);
+  svfloat64_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0));
+  svfloat64_t y = svadd_x (pg, xm1, svsqrt_x (pg, u));
+
+  /* Fall back to scalar routine for special lanes.  */
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (x, sv_log1p_inline (y, pg), special);
+  return sv_log1p_inline (y, pg);
+}
+
+TEST_SIG (SV, D, 1, acosh, 1.0, 10.0)
+TEST_ULP (SV_NAME_D1 (acosh), 2.69)
+TEST_DISABLE_FENV (SV_NAME_D1 (acosh))
+TEST_INTERVAL (SV_NAME_D1 (acosh), 1, 0x1p511, 90000)
+TEST_INTERVAL (SV_NAME_D1 (acosh), 0x1p511, inf, 10000)
+TEST_INTERVAL (SV_NAME_D1 (acosh), 0, 1, 1000)
+TEST_INTERVAL (SV_NAME_D1 (acosh), -0, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/acoshf.c b/math/aarch64/sve/acoshf.c
new file mode 100644
index 00000000000000..f48ef724e8ebf0
--- /dev/null
+++ b/math/aarch64/sve/acoshf.c
@@ -0,0 +1,51 @@
+/*
+ * Single-precision SVE acosh(x) function.
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define One 0x3f800000
+#define Thres 0x20000000 /* asuint(0x1p64) - One.  */
+
+#include "sv_log1pf_inline.h"
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
+{
+  svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f);
+  svfloat32_t y = sv_log1pf_inline (tmp, svptrue_b32 ());
+  return sv_call_f32 (acoshf, x, y, special);
+}
+
+/* Single-precision SVE acosh(x) routine. Implements the same algorithm as
+   vector acoshf and log1p.
+
+   Maximum error is 2.47 ULPs:
+   SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4
+				     want 0x1.e435a2p-4.  */
+svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
+{
+  svuint32_t ix = svreinterpret_u32 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
+
+  svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
+  svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
+  svfloat32_t tmp = svadd_x (pg, xm1, svsqrt_x (pg, u));
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (xm1, tmp, special);
+  return sv_log1pf_inline (tmp, pg);
+}
+
+TEST_SIG (SV, F, 1, acosh, 1.0, 10.0)
+TEST_ULP (SV_NAME_F1 (acosh), 1.97)
+TEST_DISABLE_FENV (SV_NAME_F1 (acosh))
+TEST_INTERVAL (SV_NAME_F1 (acosh), 0, 1, 500)
+TEST_INTERVAL (SV_NAME_F1 (acosh), 1, 0x1p64, 100000)
+TEST_INTERVAL (SV_NAME_F1 (acosh), 0x1p64, inf, 1000)
+TEST_INTERVAL (SV_NAME_F1 (acosh), -0, -inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_asin_3u.c b/math/aarch64/sve/asin.c
similarity index 80%
rename from pl/math/sv_asin_3u.c
rename to math/aarch64/sve/asin.c
index c3dd37b145ae76..cac629afae1555 100644
--- a/pl/math/sv_asin_3u.c
+++ b/math/aarch64/sve/asin.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision SVE asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -42,8 +42,8 @@ static const struct data
      asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 2.69 ulps,
-   _ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-				      want 0x1.110d7e85fdd53p-1.  */
+   _ZGVsMxv_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1
+				       want 0x1.1111dd54ddf99p-1.  */
 svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
@@ -75,10 +75,12 @@ svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
   return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
 }
 
-PL_SIG (SV, D, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_D1 (asin), 2.19)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0, 0.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), -0, -inf, 20000)
+TEST_SIG (SV, D, 1, asin, -1.0, 1.0)
+TEST_ULP (SV_NAME_D1 (asin), 2.20)
+TEST_DISABLE_FENV (SV_NAME_D1 (asin))
+TEST_INTERVAL (SV_NAME_D1 (asin), 0, 0.5, 50000)
+TEST_INTERVAL (SV_NAME_D1 (asin), 0.5, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (asin), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (SV_NAME_D1 (asin), 0x1p11, inf, 20000)
+TEST_INTERVAL (SV_NAME_D1 (asin), -0, -inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_asinf_2u5.c b/math/aarch64/sve/asinf.c
similarity index 81%
rename from pl/math/sv_asinf_2u5.c
rename to math/aarch64/sve/asinf.c
index 8e9edc2439f5d4..fe94feba7a425b 100644
--- a/pl/math/sv_asinf_2u5.c
+++ b/math/aarch64/sve/asinf.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision SVE asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -67,10 +67,12 @@ svfloat32_t SV_NAME_F1 (asin) (svfloat32_t x, const svbool_t pg)
   return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_F1 (asin), 1.91)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0, 0.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), -0, -inf, 20000)
\ No newline at end of file
+TEST_SIG (SV, F, 1, asin, -1.0, 1.0)
+TEST_ULP (SV_NAME_F1 (asin), 1.91)
+TEST_DISABLE_FENV (SV_NAME_F1 (asin))
+TEST_INTERVAL (SV_NAME_F1 (asin), 0, 0.5, 50000)
+TEST_INTERVAL (SV_NAME_F1 (asin), 0.5, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (asin), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (SV_NAME_F1 (asin), 0x1p11, inf, 20000)
+TEST_INTERVAL (SV_NAME_F1 (asin), -0, -inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/asinh.c b/math/aarch64/sve/asinh.c
new file mode 100644
index 00000000000000..5574116de1e12a
--- /dev/null
+++ b/math/aarch64/sve/asinh.c
@@ -0,0 +1,197 @@
+/*
+ * Double-precision SVE asinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define SignMask (0x8000000000000000)
+#define One (0x3ff0000000000000)
+#define Thres (0x5fe0000000000000) /* asuint64 (0x1p511).  */
+#define IndexMask (((1 << V_LOG_TABLE_BITS) - 1) << 1)
+
+static const struct data
+{
+  double even_coeffs[9];
+  double ln2, p3, p1, p4, p0, p2, c1, c3, c5, c7, c9, c11, c13, c15, c17;
+  uint64_t off, mask;
+
+} data = {
+   /* Polynomial generated using Remez on [2^-26, 1].  */
+  .even_coeffs ={
+    -0x1.55555555554a7p-3,
+    -0x1.6db6db68332e6p-5,
+    -0x1.6e8b8b654a621p-6,
+    -0x1.c9871d10885afp-7,
+    -0x1.3ddca533e9f54p-7,
+    -0x1.b90c7099dd397p-8,
+    -0x1.d217026a669ecp-9,
+    -0x1.e0f37daef9127p-11,
+    -0x1.021a48685e287p-14, },
+
+  .c1 = 0x1.3333333326c7p-4,
+  .c3 = 0x1.f1c71b26fb40dp-6,
+  .c5 = 0x1.1c4daa9e67871p-6,
+  .c7 = 0x1.7a16e8d9d2ecfp-7,
+  .c9 = 0x1.0becef748dafcp-7,
+  .c11 = 0x1.541f2bb1ffe51p-8,
+  .c13 = 0x1.0b5c7977aaf7p-9,
+  .c15 = 0x1.388b5fe542a6p-12,
+  .c17 = 0x1.93d4ba83d34dap-18,
+
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .p0 = -0x1.ffffffffffff7p-2,
+  .p1 = 0x1.55555555170d4p-2,
+  .p2 = -0x1.0000000399c27p-2,
+  .p3 = 0x1.999b2e90e94cap-3,
+  .p4 = -0x1.554e550bd501ep-3,
+  .off = 0x3fe6900900000000,
+  .mask = 0xfffULL << 52,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+  return sv_call_f64 (asinh, x, y, special);
+}
+
+static inline svfloat64_t
+__sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
+{
+  /* Double-precision SVE log, copied from SVE log implementation with some
+     cosmetic modification and special-cases removed. See that file for details
+     of the algorithm used.  */
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t i_off = svsub_x (pg, ix, d->off);
+  svuint64_t i
+      = svand_x (pg, svlsr_x (pg, i_off, (51 - V_LOG_TABLE_BITS)), IndexMask);
+  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, i_off, d->mask));
+  svfloat64_t z = svreinterpret_f64 (iz);
+
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+
+  svfloat64_t ln2_p3 = svld1rq (svptrue_b64 (), &d->ln2);
+  svfloat64_t p1_p4 = svld1rq (svptrue_b64 (), &d->p1);
+
+  svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z);
+  svfloat64_t kd
+      = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (i_off), 52));
+
+  svfloat64_t hi = svmla_lane (svadd_x (pg, logc, r), kd, ln2_p3, 0);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t y = svmla_lane (sv_f64 (d->p2), r, ln2_p3, 1);
+  svfloat64_t p = svmla_lane (sv_f64 (d->p0), r, p1_p4, 0);
+
+  y = svmla_lane (y, r2, p1_p4, 1);
+  y = svmla_x (pg, p, r2, y);
+  y = svmla_x (pg, hi, r2, y);
+  return y;
+}
+
+/* Double-precision implementation of SVE asinh(x).
+   asinh is very sensitive around 1, so it is impractical to devise a single
+   low-cost algorithm which is sufficiently accurate on a wide range of input.
+   Instead we use two different algorithms:
+   asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)      if |x| >= 1
+	    = sign(x) * (|x| + |x|^3 * P(x^2))       otherwise
+   where log(x) is an optimized log approximation, and P(x) is a polynomial
+   shared with the scalar routine. The greatest observed error 2.51 ULP, in
+   |x| >= 1:
+   _ZGVsMxv_asinh(0x1.170469d024505p+0) got 0x1.e3181c43b0f36p-1
+				       want 0x1.e3181c43b0f39p-1.  */
+svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t iax = svbic_x (pg, ix, SignMask);
+  svuint64_t sign = svand_x (pg, ix, SignMask);
+  svfloat64_t ax = svreinterpret_f64 (iax);
+  svbool_t ge1 = svcmpge (pg, iax, One);
+  svbool_t special = svcmpge (pg, iax, Thres);
+
+  /* Option 1: |x| >= 1.
+     Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).  */
+  svfloat64_t option_1 = sv_f64 (0);
+  if (likely (svptest_any (pg, ge1)))
+    {
+      svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax);
+      option_1 = __sv_log_inline (
+	  svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, x2, 1))), d, pg);
+    }
+
+  /* Option 2: |x| < 1.
+     Compute asinh(x) using a polynomial.
+     The largest observed error in this region is 1.51 ULPs:
+     _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1
+					 want 0x1.c1e649ee2681dp-1.  */
+
+  svfloat64_t option_2 = sv_f64 (0);
+  if (likely (svptest_any (pg, svnot_z (pg, ge1))))
+    {
+      svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax);
+      svfloat64_t x4 = svmul_x (svptrue_b64 (), x2, x2);
+      /* Order-17 Pairwise Horner scheme.  */
+      svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+      svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5);
+      svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9);
+      svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13);
+
+      svfloat64_t p01 = svmla_lane (sv_f64 (d->even_coeffs[0]), x2, c13, 0);
+      svfloat64_t p23 = svmla_lane (sv_f64 (d->even_coeffs[1]), x2, c13, 1);
+      svfloat64_t p45 = svmla_lane (sv_f64 (d->even_coeffs[2]), x2, c57, 0);
+      svfloat64_t p67 = svmla_lane (sv_f64 (d->even_coeffs[3]), x2, c57, 1);
+      svfloat64_t p89 = svmla_lane (sv_f64 (d->even_coeffs[4]), x2, c911, 0);
+      svfloat64_t p1011 = svmla_lane (sv_f64 (d->even_coeffs[5]), x2, c911, 1);
+      svfloat64_t p1213
+	  = svmla_lane (sv_f64 (d->even_coeffs[6]), x2, c1315, 0);
+      svfloat64_t p1415
+	  = svmla_lane (sv_f64 (d->even_coeffs[7]), x2, c1315, 1);
+      svfloat64_t p1617 = svmla_x (pg, sv_f64 (d->even_coeffs[8]), x2, d->c17);
+
+      svfloat64_t p = svmla_x (pg, p1415, x4, p1617);
+      p = svmla_x (pg, p1213, x4, p);
+      p = svmla_x (pg, p1011, x4, p);
+      p = svmla_x (pg, p89, x4, p);
+
+      p = svmla_x (pg, p67, x4, p);
+      p = svmla_x (pg, p45, x4, p);
+
+      p = svmla_x (pg, p23, x4, p);
+
+      p = svmla_x (pg, p01, x4, p);
+
+      option_2 = svmla_x (pg, ax, p, svmul_x (svptrue_b64 (), x2, ax));
+    }
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (
+	x,
+	svreinterpret_f64 (sveor_x (
+	    pg, svreinterpret_u64 (svsel (ge1, option_1, option_2)), sign)),
+	special);
+
+  /* Choose the right option for each lane.  */
+  svfloat64_t y = svsel (ge1, option_1, option_2);
+  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+}
+
+TEST_SIG (SV, D, 1, asinh, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (asinh), 2.52)
+TEST_DISABLE_FENV (SV_NAME_D1 (asinh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (asinh), 0, 0x1p-26, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (asinh), 0x1p-26, 1, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (asinh), 1, 0x1p511, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (asinh), 0x1p511, inf, 40000)
+/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
+   Ensures the v_sel is choosing the right option in all cases.  */
+TEST_CONTROL_VALUE (SV_NAME_D1 (asinh), 0.5)
+TEST_CONTROL_VALUE (SV_NAME_D1 (asinh), 2)
+TEST_CONTROL_VALUE (SV_NAME_D1 (asinh), 0x1p600)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_asinhf_2u5.c b/math/aarch64/sve/asinhf.c
similarity index 53%
rename from pl/math/sv_asinhf_2u5.c
rename to math/aarch64/sve/asinhf.c
index 1f1f6e5c846f68..32aedbfd3a6d35 100644
--- a/pl/math/sv_asinhf_2u5.c
+++ b/math/aarch64/sve/asinhf.c
@@ -1,31 +1,33 @@
 /*
  * Single-precision SVE asinh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "include/mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #include "sv_log1pf_inline.h"
 
-#define BigBound (0x5f800000)  /* asuint(0x1p64).  */
+#define BigBound 0x5f800000 /* asuint(0x1p64).  */
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special)
 {
+  svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
+  y = svreinterpret_f32 (
+      svorr_x (svptrue_b32 (), sign, svreinterpret_u32 (y)));
   return sv_call_f32 (asinhf, x, y, special);
 }
 
 /* Single-precision SVE asinh(x) routine. Implements the same algorithm as
    vector asinhf and log1p.
 
-   Maximum error is 2.48 ULPs:
-   SV_NAME_F1 (asinh) (0x1.008864p-3) got 0x1.ffbbbcp-4
-				     want 0x1.ffbbb8p-4.  */
+   Maximum error is 1.92 ULPs:
+   SV_NAME_F1 (asinh) (-0x1.0922ecp-1) got -0x1.fd0bccp-2
+				      want -0x1.fd0bc8p-2.  */
 svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
 {
   svfloat32_t ax = svabs_x (pg, x);
@@ -41,15 +43,15 @@ svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
       = sv_log1pf_inline (svadd_x (pg, ax, svdiv_x (pg, ax2, d)), pg);
 
   if (unlikely (svptest_any (pg, special)))
-    return special_case (
-	x, svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))),
-	special);
+    return special_case (iax, sign, y, special);
   return svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y)));
 }
 
-PL_SIG (SV, F, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (asinh), 1.98)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0, 0x1p-12, 4000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p-12, 1.0, 20000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 1.0, 0x1p64, 20000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p64, inf, 4000)
+TEST_SIG (SV, F, 1, asinh, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (asinh), 1.43)
+TEST_DISABLE_FENV (SV_NAME_F1 (asinh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0, 0x1p-12, 4000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p-12, 1.0, 20000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 1.0, 0x1p64, 20000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p64, inf, 4000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_atan_2u5.c b/math/aarch64/sve/atan.c
similarity index 86%
rename from pl/math/sv_atan_2u5.c
rename to math/aarch64/sve/atan.c
index 7ab486a4c9d2c4..73fc29a94f23f6 100644
--- a/pl/math/sv_atan_2u5.c
+++ b/math/aarch64/sve/atan.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision vector atan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 static const struct data
 {
@@ -79,9 +79,11 @@ svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
   return y;
 }
 
-PL_SIG (SV, D, 1, atan, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_D1 (atan), 1.78)
-PL_TEST_INTERVAL (SV_NAME_D1 (atan), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (atan), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (atan), 100, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (atan), -0, -inf, 40000)
+TEST_SIG (SV, D, 1, atan, -3.1, 3.1)
+TEST_ULP (SV_NAME_D1 (atan), 1.78)
+TEST_DISABLE_FENV (SV_NAME_D1 (atan))
+TEST_INTERVAL (SV_NAME_D1 (atan), 0.0, 1.0, 40000)
+TEST_INTERVAL (SV_NAME_D1 (atan), 1.0, 100.0, 40000)
+TEST_INTERVAL (SV_NAME_D1 (atan), 100, inf, 40000)
+TEST_INTERVAL (SV_NAME_D1 (atan), -0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_atan2_2u5.c b/math/aarch64/sve/atan2.c
similarity index 74%
rename from pl/math/sv_atan2_2u5.c
rename to math/aarch64/sve/atan2.c
index 00530a324a76fb..1e1d00678b1d91 100644
--- a/pl/math/sv_atan2_2u5.c
+++ b/math/aarch64/sve/atan2.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision vector atan2(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 static const struct data
 {
@@ -27,9 +27,6 @@ static const struct data
   .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
-/* Useful constants.  */
-#define SignMask sv_u64 (0x8000000000000000)
-
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
 static svfloat64_t NOINLINE
 special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
@@ -51,7 +48,8 @@ zeroinfnan (svuint64_t i, const svbool_t pg)
    x are reasonably close together. The greatest observed error is 2.28 ULP:
    _ZGVsMxvv_atan2 (-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
    got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1.  */
-svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
+svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x,
+				const svbool_t pg)
 {
   const struct data *data_ptr = ptr_barrier (&data);
 
@@ -62,14 +60,15 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
   svbool_t cmp_y = zeroinfnan (iy, pg);
   svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
 
-  svuint64_t sign_x = svand_x (pg, ix, SignMask);
-  svuint64_t sign_y = svand_x (pg, iy, SignMask);
-  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
-
   svfloat64_t ax = svabs_x (pg, x);
   svfloat64_t ay = svabs_x (pg, y);
+  svuint64_t iax = svreinterpret_u64 (ax);
+  svuint64_t iay = svreinterpret_u64 (ay);
+
+  svuint64_t sign_x = sveor_x (pg, ix, iax);
+  svuint64_t sign_y = sveor_x (pg, iy, iay);
+  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
 
-  svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
   /* Set up z for call to atan.  */
@@ -78,8 +77,9 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
   svfloat64_t z = svdiv_x (pg, n, d);
 
   /* Work out the correct shift.  */
-  svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0));
-  shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
+  svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
+  shift = svsel (pred_aygtax, sv_f64 (1.0), shift);
+  shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
   shift = svmul_x (pg, shift, data_ptr->pi_over_2);
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
@@ -99,18 +99,20 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
   ret = svadd_m (pg, ret, shift);
 
   /* Account for the sign of x and y.  */
-  ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
-
   if (unlikely (svptest_any (pg, cmp_xy)))
-    return special_case (y, x, ret, cmp_xy);
-
-  return ret;
+    return special_case (
+	y, x,
+	svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)),
+	cmp_xy);
+  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
 }
 
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (SV, D, 2, atan2)
-PL_TEST_ULP (SV_NAME_D2 (atan2), 1.78)
-PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 100, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_D2 (atan2), -0, -inf, 40000)
+TEST_SIG (SV, D, 2, atan2)
+TEST_ULP (SV_NAME_D2 (atan2), 1.78)
+TEST_DISABLE_FENV (SV_NAME_D2 (atan2))
+TEST_INTERVAL (SV_NAME_D2 (atan2), 0.0, 1.0, 40000)
+TEST_INTERVAL (SV_NAME_D2 (atan2), 1.0, 100.0, 40000)
+TEST_INTERVAL (SV_NAME_D2 (atan2), 100, inf, 40000)
+TEST_INTERVAL (SV_NAME_D2 (atan2), -0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_atan2f_3u.c b/math/aarch64/sve/atan2f.c
similarity index 68%
rename from pl/math/sv_atan2f_3u.c
rename to math/aarch64/sve/atan2f.c
index 9ff73ecb74ba20..563b708cfcbb1f 100644
--- a/pl/math/sv_atan2f_3u.c
+++ b/math/aarch64/sve/atan2f.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision vector atan2f(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 static const struct data
 {
@@ -22,10 +22,8 @@ static const struct data
   .pi_over_2 = 0x1.921fb6p+0f,
 };
 
-#define SignMask sv_u32 (0x80000000)
-
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
-static inline svfloat32_t
+static svfloat32_t NOINLINE
 special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
 	      const svbool_t cmp)
 {
@@ -46,7 +44,8 @@ zeroinfnan (svuint32_t i, const svbool_t pg)
    observed error is 2.95 ULP:
    _ZGVsMxvv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
 						 want 0x1.967f00p-1.  */
-svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
+svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x,
+				const svbool_t pg)
 {
   const struct data *data_ptr = ptr_barrier (&data);
 
@@ -57,14 +56,15 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
   svbool_t cmp_y = zeroinfnan (iy, pg);
   svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
 
-  svuint32_t sign_x = svand_x (pg, ix, SignMask);
-  svuint32_t sign_y = svand_x (pg, iy, SignMask);
-  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
-
   svfloat32_t ax = svabs_x (pg, x);
   svfloat32_t ay = svabs_x (pg, y);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t iay = svreinterpret_u32 (ay);
+
+  svuint32_t sign_x = sveor_x (pg, ix, iax);
+  svuint32_t sign_y = sveor_x (pg, iy, iay);
+  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
 
-  svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
   /* Set up z for call to atan.  */
@@ -73,11 +73,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
   svfloat32_t z = svdiv_x (pg, n, d);
 
   /* Work out the correct shift.  */
-  svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0));
-  shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
+  svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
+  shift = svsel (pred_aygtax, sv_f32 (1.0), shift);
+  shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
   shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2));
 
-  /* Use split Estrin scheme for P(z^2) with deg(P)=7.  */
+  /* Use pure Estrin scheme for P(z^2) with deg(P)=7.  */
   svfloat32_t z2 = svmul_x (pg, z, z);
   svfloat32_t z4 = svmul_x (pg, z2, z2);
   svfloat32_t z8 = svmul_x (pg, z4, z4);
@@ -91,18 +92,22 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
   ret = svadd_m (pg, ret, shift);
 
   /* Account for the sign of x and y.  */
-  ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
 
   if (unlikely (svptest_any (pg, cmp_xy)))
-    return special_case (y, x, ret, cmp_xy);
+    return special_case (
+	y, x,
+	svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)),
+	cmp_xy);
 
-  return ret;
+  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
 }
 
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (SV, F, 2, atan2)
-PL_TEST_ULP (SV_NAME_F2 (atan2), 2.45)
-PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 100, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_F2 (atan2), -0, -inf, 40000)
+TEST_SIG (SV, F, 2, atan2)
+TEST_ULP (SV_NAME_F2 (atan2), 2.45)
+TEST_DISABLE_FENV (SV_NAME_F2 (atan2))
+TEST_INTERVAL (SV_NAME_F2 (atan2), 0.0, 1.0, 40000)
+TEST_INTERVAL (SV_NAME_F2 (atan2), 1.0, 100.0, 40000)
+TEST_INTERVAL (SV_NAME_F2 (atan2), 100, inf, 40000)
+TEST_INTERVAL (SV_NAME_F2 (atan2), -0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_atanf_2u9.c b/math/aarch64/sve/atanf.c
similarity index 83%
rename from pl/math/sv_atanf_2u9.c
rename to math/aarch64/sve/atanf.c
index 4defb356e7f9cd..a2cd37b1274449 100644
--- a/pl/math/sv_atanf_2u9.c
+++ b/math/aarch64/sve/atanf.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision vector atan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 static const struct data
 {
@@ -68,9 +68,11 @@ svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
   return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, atan, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_F1 (atan), 2.9)
-PL_TEST_INTERVAL (SV_NAME_F1 (atan), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (atan), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (atan), 100, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (atan), -0, -inf, 40000)
+TEST_SIG (SV, F, 1, atan, -3.1, 3.1)
+TEST_ULP (SV_NAME_F1 (atan), 2.9)
+TEST_DISABLE_FENV (SV_NAME_F1 (atan))
+TEST_INTERVAL (SV_NAME_F1 (atan), 0.0, 1.0, 40000)
+TEST_INTERVAL (SV_NAME_F1 (atan), 1.0, 100.0, 40000)
+TEST_INTERVAL (SV_NAME_F1 (atan), 100, inf, 40000)
+TEST_INTERVAL (SV_NAME_F1 (atan), -0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_atanh_3u3.c b/math/aarch64/sve/atanh.c
similarity index 72%
rename from pl/math/sv_atanh_3u3.c
rename to math/aarch64/sve/atanh.c
index dcc9350b4962b1..b404df56fd7541 100644
--- a/pl/math/sv_atanh_3u3.c
+++ b/math/aarch64/sve/atanh.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision SVE atanh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define WANT_SV_LOG1P_K0_SHORTCUT 0
 #include "sv_log1p_inline.h"
@@ -34,7 +34,6 @@ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
   svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
 
   /* It is special if iax >= 1.  */
-//   svbool_t special = svcmpge (pg, iax, One);
   svbool_t special = svacge (pg, x, 1.0);
 
   /* Computation is performed based on the following sequence of equality:
@@ -50,11 +49,14 @@ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
   return svmul_x (pg, halfsign, y);
 }
 
-PL_SIG (SV, D, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_D1 (atanh), 3.32)
+TEST_SIG (SV, D, 1, atanh, -1.0, 1.0)
+TEST_ULP (SV_NAME_D1 (atanh), 3.32)
+TEST_DISABLE_FENV (SV_NAME_D1 (atanh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (atanh), 0, 0x1p-23, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (atanh), 0x1p-23, 1, 90000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (atanh), 1, inf, 100)
 /* atanh is asymptotic at 1, which is the default control value - have to set
- -c 0 specially to ensure fp exceptions are triggered correctly (choice of
- control lane is irrelevant if fp exceptions are disabled).  */
-PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 0, 0x1p-23, 10000, 0)
-PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 0x1p-23, 1, 90000, 0)
-PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 1, inf, 100, 0)
+   -c 0 specially to ensure fp exceptions are triggered correctly (choice of
+   control lane is irrelevant if fp exceptions are disabled).  */
+TEST_CONTROL_VALUE (SV_NAME_D1 (atanh), 0)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_atanhf_2u8.c b/math/aarch64/sve/atanhf.c
similarity index 61%
rename from pl/math/sv_atanhf_2u8.c
rename to math/aarch64/sve/atanhf.c
index 413c60ce05daf8..2e10a8cd22f7f1 100644
--- a/pl/math/sv_atanhf_2u8.c
+++ b/math/aarch64/sve/atanhf.c
@@ -1,14 +1,13 @@
 /*
  * Single-precision vector atanh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #include "sv_log1pf_inline.h"
 
@@ -16,15 +15,18 @@
 #define Half (0x3f000000)
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign,
+	      svfloat32_t y, svbool_t special)
 {
+  svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
+  y = svmul_x (svptrue_b32 (), halfsign, y);
   return sv_call_f32 (atanhf, x, y, special);
 }
 
 /* Approximation for vector single-precision atanh(x) using modified log1p.
-   The maximum error is 2.28 ULP:
-   _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
-				 want 0x1.ffbbb6p-5.  */
+   The maximum error is 1.99 ULP:
+   _ZGVsMxv_atanhf(0x1.f1583p-5) got 0x1.f1f4fap-5
+				want 0x1.f1f4f6p-5.  */
 svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
 {
   svfloat32_t ax = svabs_x (pg, x);
@@ -41,16 +43,19 @@ svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
   y = sv_log1pf_inline (y, pg);
 
   if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmul_x (pg, halfsign, y), special);
+    return special_case (iax, sign, halfsign, y, special);
 
   return svmul_x (pg, halfsign, y);
 }
 
-PL_SIG (SV, F, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_F1 (atanh), 2.59)
+TEST_SIG (SV, F, 1, atanh, -1.0, 1.0)
+TEST_ULP (SV_NAME_F1 (atanh), 1.50)
+TEST_DISABLE_FENV (SV_NAME_F1 (atanh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (atanh), 0, 0x1p-12, 1000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (atanh), 0x1p-12, 1, 20000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (atanh), 1, inf, 1000)
 /* atanh is asymptotic at 1, which is the default control value - have to set
  -c 0 specially to ensure fp exceptions are triggered correctly (choice of
  control lane is irrelevant if fp exceptions are disabled).  */
-PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 0, 0x1p-12, 1000, 0)
-PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 0x1p-12, 1, 20000, 0)
-PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 1, inf, 1000, 0)
+TEST_CONTROL_VALUE (SV_NAME_F1 (atanh), 0)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cbrt_2u.c b/math/aarch64/sve/cbrt.c
similarity index 77%
rename from pl/math/sv_cbrt_2u.c
rename to math/aarch64/sve/cbrt.c
index 192f1cd80d5909..3e6a972463f0a7 100644
--- a/pl/math/sv_cbrt_2u.c
+++ b/math/aarch64/sve/cbrt.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision SVE cbrt(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 const static struct data
 {
@@ -48,10 +48,16 @@ shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
 }
 
 /* Approximation for double-precision vector cbrt(x), using low-order
-   polynomial and two Newton iterations. Greatest observed error is 1.79 ULP.
-   Errors repeat according to the exponent, for instance an error observed for
-   double value m * 2^e will be observed for any input m * 2^(e + 3*i), where i
-   is an integer.
+   polynomial and two Newton iterations.
+
+   The vector version of frexp does not handle subnormals
+   correctly. As a result these need to be handled by the scalar
+   fallback, where accuracy may be worse than that of the vector code
+   path.
+
+   Greatest observed error in the normal range is 1.79 ULP. Errors repeat
+   according to the exponent, for instance an error observed for double value m
+   * 2^e will be observed for any input m * 2^(e + 3*i), where i is an integer.
    _ZGVsMxv_cbrt (0x0.3fffb8d4413f3p-1022) got 0x1.965f53b0e5d97p-342
 					  want 0x1.965f53b0e5d95p-342.  */
 svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
@@ -117,6 +123,13 @@ svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
   return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
 }
 
-PL_SIG (SV, D, 1, cbrt, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (cbrt), 1.30)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cbrt), 0, inf, 1000000)
+/* Worse-case ULP error assumes that scalar fallback is GLIBC 2.40 cbrt, which
+   has ULP error of 3.67 at 0x1.7a337e1ba1ec2p-257 [1]. Largest observed error
+   in the vector path is 1.79 ULP.
+   [1] Innocente, V., & Zimmermann, P. (2024). Accuracy of Mathematical
+   Functions in Single, Double, Double Extended, and Quadruple Precision.  */
+TEST_SIG (SV, D, 1, cbrt, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (cbrt), 3.17)
+TEST_DISABLE_FENV (SV_NAME_D1 (cbrt))
+TEST_SYM_INTERVAL (SV_NAME_D1 (cbrt), 0, inf, 1000000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cbrtf_1u7.c b/math/aarch64/sve/cbrtf.c
similarity index 92%
rename from pl/math/sv_cbrtf_1u7.c
rename to math/aarch64/sve/cbrtf.c
index 5b625f308827ce..afdace7865f19c 100644
--- a/pl/math/sv_cbrtf_1u7.c
+++ b/math/aarch64/sve/cbrtf.c
@@ -1,14 +1,14 @@
 /*
  * Single-precision SVE cbrt(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 const static struct data
 {
@@ -111,6 +111,8 @@ svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
   return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, cbrt, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (cbrt), 1.15)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cbrt), 0, inf, 1000000)
+TEST_SIG (SV, F, 1, cbrt, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (cbrt), 1.15)
+TEST_DISABLE_FENV (SV_NAME_F1 (cbrt))
+TEST_SYM_INTERVAL (SV_NAME_F1 (cbrt), 0, inf, 1000000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cexpi_3u5.c b/math/aarch64/sve/cexpi.c
similarity index 79%
rename from pl/math/sv_cexpi_3u5.c
rename to math/aarch64/sve/cexpi.c
index 920acfea5da0f5..0ccd110484c88b 100644
--- a/pl/math/sv_cexpi_3u5.c
+++ b/math/aarch64/sve/cexpi.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision vector cexpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "sv_sincos_common.h"
 #include "sv_math.h"
-#include "pl_test.h"
+#include "sv_sincos_common.h"
+#include "test_defs.h"
 
 static svfloat64x2_t NOINLINE
 special_case (svfloat64_t x, svbool_t special, svfloat64x2_t y)
@@ -34,12 +34,15 @@ _ZGVsMxv_cexpi (svfloat64_t x, svbool_t pg)
   return sc;
 }
 
-PL_TEST_ULP (_ZGVsMxv_cexpi_sin, 2.73)
-PL_TEST_ULP (_ZGVsMxv_cexpi_cos, 2.73)
+TEST_DISABLE_FENV (_ZGVsMxv_cexpi_sin)
+TEST_DISABLE_FENV (_ZGVsMxv_cexpi_cos)
+TEST_ULP (_ZGVsMxv_cexpi_sin, 2.73)
+TEST_ULP (_ZGVsMxv_cexpi_cos, 2.73)
 #define SV_CEXPI_INTERVAL(lo, hi, n)                                          \
-  PL_TEST_INTERVAL (_ZGVsMxv_cexpi_sin, lo, hi, n)                            \
-  PL_TEST_INTERVAL (_ZGVsMxv_cexpi_cos, lo, hi, n)
+  TEST_INTERVAL (_ZGVsMxv_cexpi_sin, lo, hi, n)                               \
+  TEST_INTERVAL (_ZGVsMxv_cexpi_cos, lo, hi, n)
 SV_CEXPI_INTERVAL (0, 0x1p23, 500000)
 SV_CEXPI_INTERVAL (-0, -0x1p23, 500000)
 SV_CEXPI_INTERVAL (0x1p23, inf, 10000)
 SV_CEXPI_INTERVAL (-0x1p23, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cexpif_1u8.c b/math/aarch64/sve/cexpif.c
similarity index 80%
rename from pl/math/sv_cexpif_1u8.c
rename to math/aarch64/sve/cexpif.c
index 93f2f998cb3896..fd07ce553cd893 100644
--- a/pl/math/sv_cexpif_1u8.c
+++ b/math/aarch64/sve/cexpif.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector cexpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "sv_sincosf_common.h"
 #include "sv_math.h"
-#include "pl_test.h"
+#include "sv_sincosf_common.h"
+#include "test_defs.h"
 
 static svfloat32x2_t NOINLINE
 special_case (svfloat32_t x, svbool_t special, svfloat32x2_t y)
@@ -36,12 +36,15 @@ _ZGVsMxv_cexpif (svfloat32_t x, svbool_t pg)
   return sc;
 }
 
-PL_TEST_ULP (_ZGVsMxv_cexpif_sin, 1.17)
-PL_TEST_ULP (_ZGVsMxv_cexpif_cos, 1.31)
+TEST_DISABLE_FENV (_ZGVsMxv_cexpif_sin)
+TEST_DISABLE_FENV (_ZGVsMxv_cexpif_cos)
+TEST_ULP (_ZGVsMxv_cexpif_sin, 1.17)
+TEST_ULP (_ZGVsMxv_cexpif_cos, 1.31)
 #define SV_CEXPIF_INTERVAL(lo, hi, n)                                         \
-  PL_TEST_INTERVAL (_ZGVsMxv_cexpif_sin, lo, hi, n)                           \
-  PL_TEST_INTERVAL (_ZGVsMxv_cexpif_cos, lo, hi, n)
+  TEST_INTERVAL (_ZGVsMxv_cexpif_sin, lo, hi, n)                              \
+  TEST_INTERVAL (_ZGVsMxv_cexpif_cos, lo, hi, n)
 SV_CEXPIF_INTERVAL (0, 0x1p20, 500000)
 SV_CEXPIF_INTERVAL (-0, -0x1p20, 500000)
 SV_CEXPIF_INTERVAL (0x1p20, inf, 10000)
 SV_CEXPIF_INTERVAL (-0x1p20, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cos_2u5.c b/math/aarch64/sve/cos.c
similarity index 88%
rename from pl/math/sv_cos_2u5.c
rename to math/aarch64/sve/cos.c
index 76af3459b3f2e2..93e93674a98a1d 100644
--- a/pl/math/sv_cos_2u5.c
+++ b/math/aarch64/sve/cos.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision SVE cos(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -80,7 +80,9 @@ svfloat64_t SV_NAME_D1 (cos) (svfloat64_t x, const svbool_t pg)
   return svmul_x (pg, f, y);
 }
 
-PL_SIG (SV, D, 1, cos, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_D1 (cos), 1.61)
-PL_TEST_INTERVAL (SV_NAME_D1 (cos), 0, 0xffff0000, 10000)
-PL_TEST_INTERVAL (SV_NAME_D1 (cos), 0x1p-4, 0x1p4, 500000)
+TEST_SIG (SV, D, 1, cos, -3.1, 3.1)
+TEST_ULP (SV_NAME_D1 (cos), 1.61)
+TEST_DISABLE_FENV (SV_NAME_D1 (cos))
+TEST_INTERVAL (SV_NAME_D1 (cos), 0, 0xffff0000, 10000)
+TEST_INTERVAL (SV_NAME_D1 (cos), 0x1p-4, 0x1p4, 500000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cosf_2u1.c b/math/aarch64/sve/cosf.c
similarity index 87%
rename from pl/math/sv_cosf_2u1.c
rename to math/aarch64/sve/cosf.c
index 4bdb0dd146bbfc..7d18f8c2ad21a4 100644
--- a/pl/math/sv_cosf_2u1.c
+++ b/math/aarch64/sve/cosf.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision SVE cos(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -74,7 +74,9 @@ svfloat32_t SV_NAME_F1 (cos) (svfloat32_t x, const svbool_t pg)
   return svmul_x (pg, f, y);
 }
 
-PL_SIG (SV, F, 1, cos, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_F1 (cos), 1.57)
-PL_TEST_INTERVAL (SV_NAME_F1 (cos), 0, 0xffff0000, 10000)
-PL_TEST_INTERVAL (SV_NAME_F1 (cos), 0x1p-4, 0x1p4, 500000)
+TEST_SIG (SV, F, 1, cos, -3.1, 3.1)
+TEST_ULP (SV_NAME_F1 (cos), 1.57)
+TEST_DISABLE_FENV (SV_NAME_F1 (cos))
+TEST_INTERVAL (SV_NAME_F1 (cos), 0, 0xffff0000, 10000)
+TEST_INTERVAL (SV_NAME_F1 (cos), 0x1p-4, 0x1p4, 500000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cosh_2u.c b/math/aarch64/sve/cosh.c
similarity index 77%
rename from pl/math/sv_cosh_2u.c
rename to math/aarch64/sve/cosh.c
index a6d743fb9b966a..775854cfbe5a87 100644
--- a/pl/math/sv_cosh_2u.c
+++ b/math/aarch64/sve/cosh.c
@@ -1,19 +1,19 @@
 /*
  * Double-precision SVE cosh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64_t poly[3];
   float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres;
-  uint64_t index_mask, special_bound;
+  uint64_t special_bound;
 } data = {
   .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3,
 	    0x1.5555576a59599p-5, },
@@ -25,14 +25,16 @@ static const struct data
   .shift = 0x1.8p+52,
   .thres = 704.0,
 
-  .index_mask = 0xff,
   /* 0x1.6p9, above which exp overflows.  */
   .special_bound = 0x4086000000000000,
 };
 
 static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special)
 {
+  svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
+  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
+  svfloat64_t y = svadd_x (pg, half_t, half_over_t);
   return sv_call_f64 (cosh, x, y, special);
 }
 
@@ -50,12 +52,12 @@ exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
 
   svuint64_t u = svreinterpret_u64 (z);
   svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
-  svuint64_t i = svand_x (pg, u, d->index_mask);
+  svuint64_t i = svand_x (svptrue_b64 (), u, 0xff);
 
   svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]);
   y = svmla_x (pg, sv_f64 (d->poly[0]), r, y);
   y = svmla_x (pg, sv_f64 (1.0), r, y);
-  y = svmul_x (pg, r, y);
+  y = svmul_x (svptrue_b64 (), r, y);
 
   /* s = 2^(n/N).  */
   u = svld1_gather_index (pg, __v_exp_tail_data, i);
@@ -84,17 +86,19 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
   /* Up to the point that exp overflows, we can use it to calculate cosh by
      exp(|x|) / 2 + 1 / (2 * exp(|x|)).  */
   svfloat64_t t = exp_inline (ax, pg, d);
-  svfloat64_t half_t = svmul_x (pg, t, 0.5);
-  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
 
   /* Fall back to scalar for any special cases.  */
   if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+    return special_case (x, pg, t, special);
 
+  svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
+  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
   return svadd_x (pg, half_t, half_over_t);
 }
 
-PL_SIG (SV, D, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (cosh), 1.43)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
+TEST_SIG (SV, D, 1, cosh, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (cosh), 1.43)
+TEST_DISABLE_FENV (SV_NAME_D1 (cosh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
+CLOSE_SVE_ATTR
\ No newline at end of file
diff --git a/math/aarch64/sve/coshf.c b/math/aarch64/sve/coshf.c
new file mode 100644
index 00000000000000..b79fed2374b55c
--- /dev/null
+++ b/math/aarch64/sve/coshf.c
@@ -0,0 +1,62 @@
+/*
+ * Single-precision SVE cosh(x) function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_expf_inline.h"
+
+static const struct data
+{
+  struct sv_expf_data expf_consts;
+  float special_bound;
+} data = {
+  .expf_consts = SV_EXPF_DATA,
+  /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
+  .special_bound = 0x1.5a92d8p+6,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e,
+	      svbool_t pg)
+{
+  return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e),
+		      pg);
+}
+
+/* Single-precision vector cosh, using vector expf.
+   Maximum error is 2.77 ULP:
+   _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2
+				 want 0x1.e4594cp+2.  */
+svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svbool_t special = svacge (pg, x, d->special_bound);
+
+  /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.
+     Note that x is passed to exp here, rather than |x|. This is to avoid using
+     destructive unary ABS for better register usage. However it means the
+     routine is not exactly symmetrical, as the exp helper is slightly less
+     accurate in the negative range.  */
+  svfloat32_t e = expf_inline (x, pg, &d->expf_consts);
+  svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5);
+  svfloat32_t half_over_e = svdivr_x (pg, e, 0.5);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (x, half_e, half_over_e, special);
+
+  return svadd_x (svptrue_b32 (), half_e, half_over_e);
+}
+
+TEST_SIG (SV, F, 1, cosh, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (cosh), 2.28)
+TEST_DISABLE_FENV (SV_NAME_F1 (cosh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1p-63, 100)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1.5a92d8p+6, 80000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cospi_3u2.c b/math/aarch64/sve/cospi.c
similarity index 78%
rename from pl/math/sv_cospi_3u2.c
rename to math/aarch64/sve/cospi.c
index d80f899c41e410..9859dbe7a44c7e 100644
--- a/pl/math/sv_cospi_3u2.c
+++ b/math/aarch64/sve/cospi.c
@@ -1,15 +1,15 @@
 /*
  * Double-precision SVE cospi(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "mathlib.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 static const struct data
 {
@@ -55,9 +55,12 @@ svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg)
   return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
 }
 
-PL_SIG (SV, D, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (SV_NAME_D1 (cospi), 2.71)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p51, inf, 100000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (SV_NAME_D1 (cospi), 2.71)
+TEST_DISABLE_FENV (SV_NAME_D1 (cospi))
+TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p51, inf, 100000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_cospif_2u6.c b/math/aarch64/sve/cospif.c
similarity index 75%
rename from pl/math/sv_cospif_2u6.c
rename to math/aarch64/sve/cospif.c
index fb2922d0533abf..d65a2b6190231f 100644
--- a/pl/math/sv_cospif_2u6.c
+++ b/math/aarch64/sve/cospif.c
@@ -1,15 +1,15 @@
 /*
  * Single-precision SVE cospi(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "mathlib.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 static const struct data
 {
@@ -51,9 +51,12 @@ svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg)
   return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (SV_NAME_F1 (cospi), 2.08)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0.5, 0x1p31f, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p31f, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (SV_NAME_F1 (cospi), 2.08)
+TEST_DISABLE_FENV (SV_NAME_F1 (cospi))
+TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0.5, 0x1p31f, 10000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p31f, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_erf_2u5.c b/math/aarch64/sve/erf.c
similarity index 83%
rename from pl/math/sv_erf_2u5.c
rename to math/aarch64/sve/erf.c
index cbf9718e5bb0fd..ccade93e103397 100644
--- a/pl/math/sv_erf_2u5.c
+++ b/math/aarch64/sve/erf.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision vector erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -57,14 +57,16 @@ svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg)
   svfloat64_t a = svabs_x (pg, x);
   svfloat64_t shift = sv_f64 (dat->shift);
   svfloat64_t z = svadd_x (pg, a, shift);
-  svuint64_t i
-      = svsub_x (pg, svreinterpret_u64 (z), svreinterpret_u64 (shift));
+  svuint64_t i = svand_x (pg, svreinterpret_u64 (z), 0xfff);
+  i = svadd_x (pg, i, i);
 
   /* Lookup without shortcut for small values but with predicate to avoid
      segfault for large values and NaNs.  */
   svfloat64_t r = svsub_x (pg, z, shift);
-  svfloat64_t erfr = svld1_gather_index (a_lt_max, __sv_erf_data.erf, i);
-  svfloat64_t scale = svld1_gather_index (a_lt_max, __sv_erf_data.scale, i);
+  svfloat64_t erfr
+      = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].erf, i);
+  svfloat64_t scale
+      = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].scale, i);
 
   /* erf(x) ~ erf(r) + scale * d * poly (r, d).  */
   svfloat64_t d = svsub_x (pg, a, r);
@@ -104,8 +106,10 @@ svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg)
   return svreinterpret_f64 (svorr_x (pg, sign, iy));
 }
 
-PL_SIG (SV, D, 1, erf, -6.0, 6.0)
-PL_TEST_ULP (SV_NAME_D1 (erf), 1.79)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, 5.9921875, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 5.9921875, inf, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, inf, 4000)
+TEST_SIG (SV, D, 1, erf, -6.0, 6.0)
+TEST_ULP (SV_NAME_D1 (erf), 1.79)
+TEST_DISABLE_FENV (SV_NAME_D1 (erf))
+TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, 5.9921875, 40000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 5.9921875, inf, 40000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, inf, 4000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_erfc_1u8.c b/math/aarch64/sve/erfc.c
similarity index 91%
rename from pl/math/sv_erfc_1u8.c
rename to math/aarch64/sve/erfc.c
index a91bef96f2e73a..a85cacb1ae6226 100644
--- a/pl/math/sv_erfc_1u8.c
+++ b/math/aarch64/sve/erfc.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision vector erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -93,7 +93,7 @@ svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg)
 
   /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables.  */
   i = svadd_x (pg, i, i);
-  const float64_t *p = &__erfc_data.tab[0].erfc - 2 * dat->off_arr;
+  const float64_t *p = &__v_erfc_data.tab[0].erfc - 2 * dat->off_arr;
   svfloat64_t erfcr = svld1_gather_index (pg, p, i);
   svfloat64_t scale = svld1_gather_index (pg, p + 1, i);
 
@@ -155,10 +155,12 @@ svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg)
   return svmla_x (pg, off, fac, y);
 }
 
-PL_SIG (SV, D, 1, erfc, -6.0, 28.0)
-PL_TEST_ULP (SV_NAME_D1 (erfc), 1.21)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erfc), 0.0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 28.0, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 6.0, -inf, 40000)
+TEST_SIG (SV, D, 1, erfc, -6.0, 28.0)
+TEST_ULP (SV_NAME_D1 (erfc), 1.21)
+TEST_DISABLE_FENV (SV_NAME_D1 (erfc))
+TEST_SYM_INTERVAL (SV_NAME_D1 (erfc), 0.0, 0x1p-26, 40000)
+TEST_INTERVAL (SV_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
+TEST_INTERVAL (SV_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
+TEST_INTERVAL (SV_NAME_D1 (erfc), 28.0, inf, 40000)
+TEST_INTERVAL (SV_NAME_D1 (erfc), 6.0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_erfcf_1u7.c b/math/aarch64/sve/erfcf.c
similarity index 77%
rename from pl/math/sv_erfcf_1u7.c
rename to math/aarch64/sve/erfcf.c
index cda8f0b3752e6f..936881332291ad 100644
--- a/pl/math/sv_erfcf_1u7.c
+++ b/math/aarch64/sve/erfcf.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -66,23 +66,23 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
   svuint32_t i = svqadd (svreinterpret_u32 (z), dat->off_idx);
 
   /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables.  */
-  i = svmul_x (pg, i, 2);
-  const float32_t *p = &__erfcf_data.tab[0].erfc - 2 * dat->off_arr;
+  i = svlsl_x (svptrue_b32 (), i, 1);
+  const float32_t *p = &__v_erfcf_data.tab[0].erfc - 2 * dat->off_arr;
   svfloat32_t erfcr = svld1_gather_index (pg, p, i);
   svfloat32_t scale = svld1_gather_index (pg, p + 1, i);
 
   /* erfc(x) ~ erfc(r) - scale * d * poly(r, d).  */
   svfloat32_t r = svsub_x (pg, z, shift);
   svfloat32_t d = svsub_x (pg, a, r);
-  svfloat32_t d2 = svmul_x (pg, d, d);
-  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t d2 = svmul_x (svptrue_b32 (), d, d);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
 
   svfloat32_t coeffs = svld1rq (svptrue_b32 (), &dat->third);
-  svfloat32_t third = svdup_lane (coeffs, 0);
 
   svfloat32_t p1 = r;
-  svfloat32_t p2 = svmls_lane (third, r2, coeffs, 1);
-  svfloat32_t p3 = svmul_x (pg, r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0));
+  svfloat32_t p2 = svmls_lane (sv_f32 (dat->third), r2, coeffs, 1);
+  svfloat32_t p3
+      = svmul_x (svptrue_b32 (), r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0));
   svfloat32_t p4 = svmla_lane (sv_f32 (dat->two_over_five), r2, coeffs, 2);
   p4 = svmls_x (pg, sv_f32 (dat->tenth), r2, p4);
 
@@ -102,10 +102,12 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
   return svmla_x (pg, off, fac, y);
 }
 
-PL_SIG (SV, F, 1, erfc, -4.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (erfc), 1.14)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erfc), 0.0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (erfc), 10.0625, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (erfc), -4.0, -inf, 40000)
+TEST_SIG (SV, F, 1, erfc, -4.0, 10.0)
+TEST_ULP (SV_NAME_F1 (erfc), 1.14)
+TEST_DISABLE_FENV (SV_NAME_F1 (erfc))
+TEST_SYM_INTERVAL (SV_NAME_F1 (erfc), 0.0, 0x1p-26, 40000)
+TEST_INTERVAL (SV_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
+TEST_INTERVAL (SV_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
+TEST_INTERVAL (SV_NAME_F1 (erfc), 10.0625, inf, 40000)
+TEST_INTERVAL (SV_NAME_F1 (erfc), -4.0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_erff_2u.c b/math/aarch64/sve/erff.c
similarity index 77%
rename from pl/math/sv_erff_2u.c
rename to math/aarch64/sve/erff.c
index adeee798ee2e08..c8c87499a63fae 100644
--- a/pl/math/sv_erff_2u.c
+++ b/math/aarch64/sve/erff.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -52,18 +52,17 @@ svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg)
 
   svfloat32_t shift = sv_f32 (dat->shift);
   svfloat32_t z = svadd_x (pg, a, shift);
-  svuint32_t i
-      = svsub_x (pg, svreinterpret_u32 (z), svreinterpret_u32 (shift));
-
-  /* Saturate lookup index.  */
-  i = svsel (a_ge_max, sv_u32 (512), i);
+  svuint32_t i = svand_x (pg, svreinterpret_u32 (z), 0xfff);
+  i = svadd_x (pg, i, i);
 
   /* r and erf(r) set to 0 for |x| below min.  */
   svfloat32_t r = svsub_z (a_gt_min, z, shift);
-  svfloat32_t erfr = svld1_gather_index (a_gt_min, __sv_erff_data.erf, i);
+  svfloat32_t erfr
+      = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].erf, i);
 
   /* scale set to 2/sqrt(pi) for |x| below min.  */
-  svfloat32_t scale = svld1_gather_index (a_gt_min, __sv_erff_data.scale, i);
+  svfloat32_t scale
+      = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].scale, i);
   scale = svsel (a_gt_min, scale, sv_f32 (dat->scale));
 
   /* erf(x) ~ erf(r) + scale * d * (1 - r * d + 1/3 * d^2).  */
@@ -82,9 +81,11 @@ svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg)
   return svreinterpret_f32 (svorr_x (pg, sign, iy));
 }
 
-PL_SIG (SV, F, 1, erf, -4.0, 4.0)
-PL_TEST_ULP (SV_NAME_F1 (erf), 1.43)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, 0x1.cp-7, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0x1.cp-7, 3.9375, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 3.9375, inf, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, inf, 4000)
+TEST_SIG (SV, F, 1, erf, -4.0, 4.0)
+TEST_ULP (SV_NAME_F1 (erf), 1.43)
+TEST_DISABLE_FENV (SV_NAME_F1 (erf))
+TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, 0x1.cp-7, 40000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0x1.cp-7, 3.9375, 40000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 3.9375, inf, 40000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, inf, 4000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_exp_1u5.c b/math/aarch64/sve/exp.c
similarity index 79%
rename from pl/math/sv_exp_1u5.c
rename to math/aarch64/sve/exp.c
index c187def9e62530..b021e64ffedf21 100644
--- a/pl/math/sv_exp_1u5.c
+++ b/math/aarch64/sve/exp.c
@@ -1,22 +1,25 @@
 /*
  * Double-precision vector e^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
-  double poly[4];
+  double c0, c2;
+  double c1, c3;
   double ln2_hi, ln2_lo, inv_ln2, shift, thres;
+
 } data = {
-  .poly = { /* ulp error: 0.53.  */
-	    0x1.fffffffffdbcdp-2, 0x1.555555555444cp-3, 0x1.555573c6a9f7dp-5,
-	    0x1.1111266d28935p-7 },
+  .c0 = 0x1.fffffffffdbcdp-2,
+  .c1 = 0x1.555555555444cp-3,
+  .c2 = 0x1.555573c6a9f7dp-5,
+  .c3 = 0x1.1111266d28935p-7,
   .ln2_hi = 0x1.62e42fefa3800p-1,
   .ln2_lo = 0x1.ef35793c76730p-45,
   /* 1/ln2.  */
@@ -26,7 +29,6 @@ static const struct data
   .thres = 704.0,
 };
 
-#define C(i) sv_f64 (d->poly[i])
 #define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
 /* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
 #define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
@@ -46,20 +48,20 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n)
   svuint64_t b
       = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0.  */
 
-  /* Set s1 to generate overflow depending on sign of exponent n.  */
-  svfloat64_t s1 = svreinterpret_f64 (
-      svsubr_x (pg, b, SpecialBias1)); /* 0x70...0 - b.  */
-  /* Offset s to avoid overflow in final result if n is below threshold.  */
+  /* Set s1 to generate overflow depending on sign of exponent n,
+     ie. s1 = 0x70...0 - b.  */
+  svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
+  /* Offset s to avoid overflow in final result if n is below threshold.
+     ie. s2 = as_u64 (s) - 0x3010...0 + b.  */
   svfloat64_t s2 = svreinterpret_f64 (
-      svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2),
-	       b)); /* as_u64 (s) - 0x3010...0 + b.  */
+      svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
 
   /* |n| > 1280 => 2^(n) overflows.  */
   svbool_t p_cmp = svacgt (pg, n, 1280.0);
 
-  svfloat64_t r1 = svmul_x (pg, s1, s1);
+  svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
   svfloat64_t r2 = svmla_x (pg, s2, s2, y);
-  svfloat64_t r0 = svmul_x (pg, r2, s1);
+  svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
 
   return svsel (p_cmp, r1, r0);
 }
@@ -93,16 +95,16 @@ svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
   svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
   svuint64_t u = svreinterpret_u64 (z);
   svfloat64_t n = svsub_x (pg, z, d->shift);
-
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
   /* r = x - n * ln2, r is in [-ln2/(2N), ln2/(2N)].  */
   svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
   svfloat64_t r = svmls_lane (x, n, ln2, 0);
   r = svmls_lane (r, n, ln2, 1);
 
   /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t p01 = svmla_x (pg, C (0), C (1), r);
-  svfloat64_t p23 = svmla_x (pg, C (2), C (3), r);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
   svfloat64_t p04 = svmla_x (pg, p01, p23, r2);
   svfloat64_t y = svmla_x (pg, r, p04, r2);
 
@@ -129,9 +131,11 @@ svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
   return svmla_x (pg, s, s, y);
 }
 
-PL_SIG (SV, D, 1, exp, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_D1 (exp), 1.46)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0, 0x1p-23, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p-23, 1, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 1, 0x1p23, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p23, inf, 50000)
+TEST_SIG (SV, D, 1, exp, -9.9, 9.9)
+TEST_ULP (SV_NAME_D1 (exp), 1.46)
+TEST_DISABLE_FENV (SV_NAME_D1 (exp))
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0, 0x1p-23, 40000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p-23, 1, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 1, 0x1p23, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p23, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_exp10_1u5.c b/math/aarch64/sve/exp10.c
similarity index 79%
rename from pl/math/sv_exp10_1u5.c
rename to math/aarch64/sve/exp10.c
index 519693afcab0b3..3d6af334e155f2 100644
--- a/pl/math/sv_exp10_1u5.c
+++ b/math/aarch64/sve/exp10.c
@@ -1,28 +1,30 @@
 /*
  * Double-precision SVE 10^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define SpecialBound 307.0 /* floor (log10 (2^1023)).  */
 
 static const struct data
 {
-  double poly[5];
+  double c1, c3, c2, c4, c0;
   double shift, log10_2, log2_10_hi, log2_10_lo, scale_thres, special_bound;
 } data = {
   /* Coefficients generated using Remez algorithm.
      rel error: 0x1.9fcb9b3p-60
      abs error: 0x1.a20d9598p-60 in [ -log10(2)/128, log10(2)/128 ]
      max ulp err 0.52 +0.5.  */
-  .poly = { 0x1.26bb1bbb55516p1, 0x1.53524c73cd32ap1, 0x1.0470591daeafbp1,
-	    0x1.2bd77b1361ef6p0, 0x1.142b5d54e9621p-1 },
+  .c0 = 0x1.26bb1bbb55516p1,
+  .c1 = 0x1.53524c73cd32ap1,
+  .c2 = 0x1.0470591daeafbp1,
+  .c3 = 0x1.2bd77b1361ef6p0,
+  .c4 = 0x1.142b5d54e9621p-1,
   /* 1.5*2^46+1023. This value is further explained below.  */
   .shift = 0x1.800000000ffc0p+46,
   .log10_2 = 0x1.a934f0979a371p1,     /* 1/log2(10).  */
@@ -60,9 +62,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
   /* |n| > 1280 => 2^(n) overflows.  */
   svbool_t p_cmp = svacgt (pg, n, d->scale_thres);
 
-  svfloat64_t r1 = svmul_x (pg, s1, s1);
+  svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
   svfloat64_t r2 = svmla_x (pg, s2, s2, y);
-  svfloat64_t r0 = svmul_x (pg, r2, s1);
+  svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
 
   return svsel (p_cmp, r1, r0);
 }
@@ -93,11 +95,14 @@ svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
      comes at significant performance cost.  */
   svuint64_t u = svreinterpret_u64 (z);
   svfloat64_t scale = svexpa (u);
-
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
   /* Approximate exp10(r) using polynomial.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = svmla_x (pg, svmul_x (pg, r, d->poly[0]), r2,
-			   sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly + 1));
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  svfloat64_t p14 = svmla_x (pg, p12, p34, r2);
+
+  svfloat64_t y = svmla_x (pg, svmul_x (svptrue_b64 (), r, d->c0), r2, p14);
 
   /* Assemble result as exp10(x) = 2^n * exp10(r).  If |x| > SpecialBound
      multiplication may overflow, so use special case routine.  */
@@ -116,7 +121,11 @@ svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
   return svmla_x (pg, scale, scale, y);
 }
 
-PL_SIG (SV, D, 1, exp10, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_D1 (exp10), 0.52)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), 0, 307, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), 307, inf, 1000)
+#if WANT_EXP10_TESTS
+TEST_SIG (SV, D, 1, exp10, -9.9, 9.9)
+TEST_ULP (SV_NAME_D1 (exp10), 0.52)
+TEST_DISABLE_FENV (SV_NAME_D1 (exp10))
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), 0, SpecialBound, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), SpecialBound, inf, 1000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/exp10f.c b/math/aarch64/sve/exp10f.c
new file mode 100644
index 00000000000000..8679df87702f0f
--- /dev/null
+++ b/math/aarch64/sve/exp10f.c
@@ -0,0 +1,101 @@
+/*
+ * Single-precision SVE 10^x function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define _GNU_SOURCE
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
+
+/* For x < -Thres, the result is subnormal and not handled correctly by
+   FEXPA.  */
+#define Thres 37.9
+
+static const struct data
+{
+  float log2_10_lo, c0, c2, c4;
+  float c1, c3, log10_2;
+  float shift, log2_10_hi, thres;
+} data = {
+  /* Coefficients generated using Remez algorithm with minimisation of relative
+     error.
+     rel error: 0x1.89dafa3p-24
+     abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
+     maxerr: 0.52 +0.5 ulp.  */
+  .c0 = 0x1.26bb16p+1f,
+  .c1 = 0x1.5350d2p+1f,
+  .c2 = 0x1.04744ap+1f,
+  .c3 = 0x1.2d8176p+0f,
+  .c4 = 0x1.12b41ap-1f,
+  /* 1.5*2^17 + 127, a shift value suitable for FEXPA.  */
+  .shift = 0x1.803f8p17f,
+  .log10_2 = 0x1.a934fp+1,
+  .log2_10_hi = 0x1.344136p-2,
+  .log2_10_lo = -0x1.ec10cp-27,
+  .thres = Thres,
+};
+
+static inline svfloat32_t
+sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
+{
+  /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
+     with poly(r) in [1/sqrt(2), sqrt(2)] and
+     x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N].  */
+
+  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo);
+
+  /* n = round(x/(log10(2)/N)).  */
+  svfloat32_t shift = sv_f32 (d->shift);
+  svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift);
+  svfloat32_t n = svsub_x (svptrue_b32 (), z, shift);
+
+  /* r = x - n*log10(2)/N.  */
+  svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x);
+  r = svmls_lane (r, n, lane_consts, 0);
+
+  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+  /* Polynomial evaluation: poly(r) ~ exp10(r)-1.  */
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+  svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
+  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+  return svmla_x (pg, scale, scale, poly);
+}
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct data *d)
+{
+  return sv_call_f32 (exp10f, x, sv_exp10f_inline (x, svptrue_b32 (), d),
+		      special);
+}
+
+/* Single-precision SVE exp10f routine. Implements the same algorithm
+   as AdvSIMD exp10f.
+   Worst case error is 1.02 ULPs.
+   _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
+				  want 0x1.ba5f9cp-1.  */
+svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t special = svacgt (pg, x, d->thres);
+  if (unlikely (svptest_any (special, special)))
+    return special_case (x, special, d);
+  return sv_exp10f_inline (x, pg, d);
+}
+
+#if WANT_EXP10_TESTS
+TEST_SIG (SV, F, 1, exp10, -9.9, 9.9)
+TEST_ULP (SV_NAME_F1 (exp10), 0.52)
+TEST_DISABLE_FENV (SV_NAME_F1 (exp10))
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), 0, Thres, 50000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), Thres, inf, 50000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_exp2_2u.c b/math/aarch64/sve/exp2.c
similarity index 72%
rename from pl/math/sv_exp2_2u.c
rename to math/aarch64/sve/exp2.c
index dcbca8adddd1de..adbe40c648ac9a 100644
--- a/pl/math/sv_exp2_2u.c
+++ b/math/aarch64/sve/exp2.c
@@ -1,14 +1,13 @@
 /*
  * Double-precision SVE 2^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define N (1 << V_EXP_TABLE_BITS)
 
@@ -17,15 +16,15 @@
 
 static const struct data
 {
-  double poly[4];
+  double c0, c2;
+  double c1, c3;
   double shift, big_bound, uoflow_bound;
 } data = {
   /* Coefficients are computed using Remez algorithm with
      minimisation of the absolute error.  */
-  .poly = { 0x1.62e42fefa3686p-1, 0x1.ebfbdff82c241p-3, 0x1.c6b09b16de99ap-5,
-	    0x1.3b2abf5571ad8p-7 },
-  .shift = 0x1.8p52 / N,
-  .uoflow_bound = UOFlowBound,
+  .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3,
+  .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7,
+  .shift = 0x1.8p52 / N,      .uoflow_bound = UOFlowBound,
   .big_bound = BigBound,
 };
 
@@ -57,9 +56,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
   /* |n| > 1280 => 2^(n) overflows.  */
   svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound);
 
-  svfloat64_t r1 = svmul_x (pg, s1, s1);
+  svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
   svfloat64_t r2 = svmla_x (pg, s2, s2, y);
-  svfloat64_t r0 = svmul_x (pg, r2, s1);
+  svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
 
   return svsel (p_cmp, r1, r0);
 }
@@ -89,19 +88,24 @@ svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
   svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS);
   svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top));
 
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
   /* Approximate exp2(r) using polynomial.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t p = sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly);
-  svfloat64_t y = svmul_x (pg, r, p);
-
+  /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4.  */
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
+  svfloat64_t p = svmla_x (pg, p01, p23, r2);
+  svfloat64_t y = svmul_x (svptrue_b64 (), r, p);
   /* Assemble exp2(x) = exp2(r) * scale.  */
   if (unlikely (svptest_any (pg, special)))
     return special_case (pg, scale, y, kd, d);
   return svmla_x (pg, scale, scale, y);
 }
 
-PL_SIG (SV, D, 1, exp2, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_D1 (exp2), 1.15)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), 0, BigBound, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), BigBound, UOFlowBound, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), UOFlowBound, inf, 1000)
+TEST_SIG (SV, D, 1, exp2, -9.9, 9.9)
+TEST_ULP (SV_NAME_D1 (exp2), 1.15)
+TEST_DISABLE_FENV (SV_NAME_D1 (exp2))
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), 0, BigBound, 1000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), BigBound, UOFlowBound, 100000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), UOFlowBound, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/exp2f.c b/math/aarch64/sve/exp2f.c
new file mode 100644
index 00000000000000..f4c1d0ae607e01
--- /dev/null
+++ b/math/aarch64/sve/exp2f.c
@@ -0,0 +1,83 @@
+/*
+ * Single-precision SVE 2^x function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define Thres 0x1.5d5e2ap+6f
+
+static const struct data
+{
+  float c0, c2, c4, c1, c3;
+  float shift, thres;
+} data = {
+  /* Coefficients copied from the polynomial in AdvSIMD variant.  */
+  .c0 = 0x1.62e422p-1f,
+  .c1 = 0x1.ebf9bcp-3f,
+  .c2 = 0x1.c6bd32p-5f,
+  .c3 = 0x1.3ce9e4p-7f,
+  .c4 = 0x1.59977ap-10f,
+  /* 1.5*2^17 + 127.  */
+  .shift = 0x1.803f8p17f,
+  /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+     correctly by FEXPA.  */
+  .thres = Thres,
+};
+
+static inline svfloat32_t
+sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
+{
+  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+    x = n + r, with r in [-1/2, 1/2].  */
+  svfloat32_t z = svadd_x (svptrue_b32 (), x, d->shift);
+  svfloat32_t n = svsub_x (svptrue_b32 (), z, d->shift);
+  svfloat32_t r = svsub_x (svptrue_b32 (), x, n);
+
+  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+  /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
+     Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
+     coefficients 1 to 4, and apply most significant coefficient directly.  */
+  svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2);
+  svfloat32_t p14 = svmla_x (pg, p12, r2, p34);
+  svfloat32_t p0 = svmul_lane (r, even_coeffs, 0);
+  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+  return svmla_x (pg, scale, scale, poly);
+}
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct data *d)
+{
+  return sv_call_f32 (exp2f, x, sv_exp2f_inline (x, svptrue_b32 (), d),
+		      special);
+}
+
+/* Single-precision SVE exp2f routine. Implements the same algorithm
+   as AdvSIMD exp2f.
+   Worst case error is 1.04 ULPs.
+   _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1
+				 want 0x1.ba6a64p-1.  */
+svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t special = svacgt (pg, x, d->thres);
+  if (unlikely (svptest_any (special, special)))
+    return special_case (x, special, d);
+  return sv_exp2f_inline (x, pg, d);
+}
+
+TEST_SIG (SV, F, 1, exp2, -9.9, 9.9)
+TEST_ULP (SV_NAME_F1 (exp2), 0.54)
+TEST_DISABLE_FENV (SV_NAME_F1 (exp2))
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp2), 0, Thres, 50000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp2), Thres, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/expf.c b/math/aarch64/sve/expf.c
new file mode 100644
index 00000000000000..11528abdbbaf7d
--- /dev/null
+++ b/math/aarch64/sve/expf.c
@@ -0,0 +1,50 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_expf_inline.h"
+
+/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+   correctly by FEXPA.  */
+#define Thres 0x1.5d5e2ap+6f
+
+static const struct data
+{
+  struct sv_expf_data d;
+  float thres;
+} data = {
+  .d = SV_EXPF_DATA,
+  .thres = Thres,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d)
+{
+  return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special);
+}
+
+/* Optimised single-precision SVE exp function.
+   Worst-case error is 1.04 ulp:
+   SV_NAME_F1 (exp)(0x1.a8eda4p+1) got 0x1.ba74bcp+4
+				  want 0x1.ba74bap+4.  */
+svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t is_special_case = svacgt (pg, x, d->thres);
+  if (unlikely (svptest_any (pg, is_special_case)))
+    return special_case (x, is_special_case, &d->d);
+  return expf_inline (x, pg, &d->d);
+}
+
+TEST_SIG (SV, F, 1, exp, -9.9, 9.9)
+TEST_ULP (SV_NAME_F1 (exp), 0.55)
+TEST_DISABLE_FENV (SV_NAME_F1 (exp))
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0, Thres, 50000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp), Thres, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_expm1_2u5.c b/math/aarch64/sve/expm1.c
similarity index 86%
rename from pl/math/sv_expm1_2u5.c
rename to math/aarch64/sve/expm1.c
index 82a31f6d9c0e92..f4fb8cb982f02b 100644
--- a/pl/math/sv_expm1_2u5.c
+++ b/math/aarch64/sve/expm1.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision vector exp(x) - 1 function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define SpecialBound 0x1.62b7d369a5aa9p+9
 #define ExponentBias 0x3ff0000000000000
@@ -88,8 +88,10 @@ svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
   return y;
 }
 
-PL_SIG (SV, D, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_D1 (expm1), 1.68)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0, 0x1p-23, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0x1p-23, SpecialBound, 200000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), SpecialBound, inf, 1000)
+TEST_SIG (SV, D, 1, expm1, -9.9, 9.9)
+TEST_ULP (SV_NAME_D1 (expm1), 1.68)
+TEST_DISABLE_FENV (SV_NAME_D1 (expm1))
+TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0, 0x1p-23, 1000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0x1p-23, SpecialBound, 200000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), SpecialBound, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_expm1f_1u6.c b/math/aarch64/sve/expm1f.c
similarity index 67%
rename from pl/math/sv_expm1f_1u6.c
rename to math/aarch64/sve/expm1f.c
index 0ec7c00f5300b2..95f7c09a403d03 100644
--- a/pl/math/sv_expm1f_1u6.c
+++ b/math/aarch64/sve/expm1f.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector exp(x) - 1 function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Largest value of x for which expm1(x) should round to -1.  */
 #define SpecialBound 0x1.5ebc4p+6f
@@ -17,20 +17,17 @@ static const struct data
   /* These 4 are grouped together so they can be loaded as one quadword, then
      used with _lane forms of svmla/svmls.  */
   float c2, c4, ln2_hi, ln2_lo;
-  float c0, c1, c3, inv_ln2, special_bound, shift;
+  float c0, inv_ln2, c1, c3, special_bound;
 } data = {
   /* Generated using fpminimax.  */
   .c0 = 0x1.fffffep-2,		 .c1 = 0x1.5554aep-3,
   .c2 = 0x1.555736p-5,		 .c3 = 0x1.12287cp-7,
-  .c4 = 0x1.6b55a2p-10,
+  .c4 = 0x1.6b55a2p-10,		 .inv_ln2 = 0x1.715476p+0f,
+  .special_bound = SpecialBound, .ln2_lo = 0x1.7f7d1cp-20f,
+  .ln2_hi = 0x1.62e4p-1f,
 
-  .special_bound = SpecialBound, .shift = 0x1.8p23f,
-  .inv_ln2 = 0x1.715476p+0f,	 .ln2_hi = 0x1.62e4p-1f,
-  .ln2_lo = 0x1.7f7d1cp-20f,
 };
 
-#define C(i) sv_f32 (d->c##i)
-
 static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t pg)
 {
@@ -60,9 +57,8 @@ svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
-  svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
-  j = svsub_x (pg, j, d->shift);
-  svint32_t i = svcvt_s32_x (pg, j);
+  svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2);
+  j = svrinta_x (pg, j);
 
   svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
   f = svmls_lane (f, j, lane_constants, 3);
@@ -72,22 +68,24 @@ svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg)
 	 x + ax^2 + bx^3 + cx^4 ....
      So we calculate the polynomial P(f) = a + bf + cf^2 + ...
      and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
-  svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
-  svfloat32_t f2 = svmul_x (pg, f, f);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1);
+  svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f);
   svfloat32_t p = svmla_x (pg, p12, f2, p34);
-  p = svmla_x (pg, C (0), f, p);
+
+  p = svmla_x (pg, sv_f32 (d->c0), f, p);
   p = svmla_x (pg, f, f2, p);
 
   /* Assemble the result.
      expm1(x) ~= 2^i * (p + 1) - 1
      Let t = 2^i.  */
-  svfloat32_t t = svreinterpret_f32 (
-      svadd_x (pg, svreinterpret_u32 (svlsl_x (pg, i, 23)), 0x3f800000));
-  return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+  svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j));
+  return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t);
 }
 
-PL_SIG (SV, F, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_F1 (expm1), 1.02)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), 0, SpecialBound, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), SpecialBound, inf, 1000)
+TEST_SIG (SV, F, 1, expm1, -9.9, 9.9)
+TEST_ULP (SV_NAME_F1 (expm1), 1.02)
+TEST_DISABLE_FENV (SV_NAME_F1 (expm1))
+TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), 0, SpecialBound, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), SpecialBound, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_hypot_1u5.c b/math/aarch64/sve/hypot.c
similarity index 72%
rename from pl/math/sv_hypot_1u5.c
rename to math/aarch64/sve/hypot.c
index cf1590e4b9ab19..2ed298623accfa 100644
--- a/pl/math/sv_hypot_1u5.c
+++ b/math/aarch64/sve/hypot.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision SVE hypot(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -43,9 +43,11 @@ svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg)
   return svsqrt_x (pg, sqsum);
 }
 
-PL_SIG (SV, D, 2, hypot, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D2 (hypot), 0.71)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
+TEST_SIG (SV, D, 2, hypot, -10.0, 10.0)
+TEST_ULP (SV_NAME_D2 (hypot), 0.71)
+TEST_DISABLE_FENV (SV_NAME_D2 (hypot))
+TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_hypotf_1u5.c b/math/aarch64/sve/hypotf.c
similarity index 69%
rename from pl/math/sv_hypotf_1u5.c
rename to math/aarch64/sve/hypotf.c
index f428832b3dbcd7..b977b998986b87 100644
--- a/pl/math/sv_hypotf_1u5.c
+++ b/math/aarch64/sve/hypotf.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision SVE hypot(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define TinyBound 0x0c800000 /* asuint (0x1p-102).  */
 #define Thres 0x73000000     /* 0x70000000 - TinyBound.  */
@@ -37,9 +37,11 @@ svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
   return svsqrt_x (pg, sqsum);
 }
 
-PL_SIG (SV, F, 2, hypot, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F2 (hypot), 0.71)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
+TEST_SIG (SV, F, 2, hypot, -10.0, 10.0)
+TEST_ULP (SV_NAME_F2 (hypot), 0.71)
+TEST_DISABLE_FENV (SV_NAME_F2 (hypot))
+TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/log.c b/math/aarch64/sve/log.c
new file mode 100644
index 00000000000000..c612df48c1fdb5
--- /dev/null
+++ b/math/aarch64/sve/log.c
@@ -0,0 +1,97 @@
+/*
+ * Double-precision SVE log(x) function.
+ *
+ * Copyright (c) 2020-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define N (1 << V_LOG_TABLE_BITS)
+#define Max (0x7ff0000000000000)
+#define Min (0x0010000000000000)
+#define Thresh (0x7fe0000000000000) /* Max - Min.  */
+
+static const struct data
+{
+  double c0, c2;
+  double c1, c3;
+  double ln2, c4;
+  uint64_t off;
+} data = {
+  .c0 = -0x1.ffffffffffff7p-2,
+  .c1 = 0x1.55555555170d4p-2,
+  .c2 = -0x1.0000000399c27p-2,
+  .c3 = 0x1.999b2e90e94cap-3,
+  .c4 = -0x1.554e550bd501ep-3,
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .off = 0x3fe6900900000000,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2,
+	      svbool_t special, const struct data *d)
+{
+  svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off));
+  return sv_call_f64 (log, x, svmla_x (svptrue_b64 (), hi, r2, y), special);
+}
+
+/* Double-precision SVE log routine.
+   Maximum measured error is 2.64 ulp:
+   SV_NAME_D1 (log)(0x1.95e54bc91a5e2p+184) got 0x1.fffffffe88cacp+6
+					   want 0x1.fffffffe88cafp+6.  */
+svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
+  /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
+     The actual value of i is double this due to table layout.  */
+  svuint64_t i
+      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
+  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+  svfloat64_t z = svreinterpret_f64 (iz);
+  /* Lookup in 2 global lists (length N).  */
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  svfloat64_t kd = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
+  /* hi = r + log(c) + k*Ln2.  */
+  svfloat64_t ln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->ln2);
+  svfloat64_t r = svmad_x (pg, invc, z, -1);
+  svfloat64_t hi = svmla_lane_f64 (logc, kd, ln2_and_c4, 0);
+  hi = svadd_x (pg, r, hi);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1);
+  svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0);
+  y = svmla_lane_f64 (y, r2, ln2_and_c4, 1);
+  y = svmla_x (pg, p, r2, y);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (hi, tmp, y, r2, special, d);
+  return svmla_x (pg, hi, r2, y);
+}
+
+TEST_SIG (SV, D, 1, log, 0.01, 11.1)
+TEST_ULP (SV_NAME_D1 (log), 2.15)
+TEST_DISABLE_FENV (SV_NAME_D1 (log))
+TEST_INTERVAL (SV_NAME_D1 (log), -0.0, -inf, 1000)
+TEST_INTERVAL (SV_NAME_D1 (log), 0, 0x1p-149, 1000)
+TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/log10.c b/math/aarch64/sve/log10.c
new file mode 100644
index 00000000000000..5af142d79f55fb
--- /dev/null
+++ b/math/aarch64/sve/log10.c
@@ -0,0 +1,101 @@
+/*
+ * Double-precision SVE log10(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define Min 0x0010000000000000
+#define Max 0x7ff0000000000000
+#define Thres 0x7fe0000000000000 /* Max - Min.  */
+#define N (1 << V_LOG10_TABLE_BITS)
+
+static const struct data
+{
+  double c0, c2;
+  double c1, c3;
+  double invln10, log10_2;
+  double c4;
+  uint64_t off;
+} data = {
+  .c0 = -0x1.bcb7b1526e506p-3,
+  .c1 = 0x1.287a7636be1d1p-3,
+  .c2 = -0x1.bcb7b158af938p-4,
+  .c3 = 0x1.63c78734e6d07p-4,
+  .c4 = -0x1.287461742fee4p-4,
+  .invln10 = 0x1.bcb7b1526e50ep-2,
+  .log10_2 = 0x1.34413509f79ffp-2,
+  .off = 0x3fe6900900000000,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2,
+	      svbool_t special, const struct data *d)
+{
+  svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off));
+  return sv_call_f64 (log10, x, svmla_x (svptrue_b64 (), hi, r2, y), special);
+}
+
+/* Double-precision SVE log10 routine.
+   Maximum measured error is 2.46 ulps.
+   SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6
+					   want 0x1.fffbdf6eaa667p-6.  */
+svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
+  svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG10_TABLE_BITS);
+  i = svand_x (pg, i, (N - 1) << 1);
+  svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
+  svfloat64_t z = svreinterpret_f64 (
+      svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
+
+  /* log(x) = k*log(2) + log(c) + log(z/c).  */
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log10_data.table[0].invc, i);
+  svfloat64_t logc
+      = svld1_gather_index (pg, &__v_log10_data.table[0].log10c, i);
+
+  /* We approximate log(z/c) with a polynomial P(x) ~= log(x + 1):
+     r = z/c - 1 (we look up precomputed 1/c)
+     log(z/c) ~= P(r).  */
+  svfloat64_t r = svmad_x (pg, invc, z, -1.0);
+
+  /* hi = log(c) + k*log(2).  */
+  svfloat64_t invln10_log10_2 = svld1rq_f64 (svptrue_b64 (), &d->invln10);
+  svfloat64_t w = svmla_lane_f64 (logc, r, invln10_log10_2, 0);
+  svfloat64_t hi = svmla_lane_f64 (w, k, invln10_log10_2, 1);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1);
+  svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0);
+  y = svmla_x (pg, y, r2, d->c4);
+  y = svmla_x (pg, p, r2, y);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (hi, tmp, y, r2, special, d);
+  return svmla_x (pg, hi, r2, y);
+}
+
+TEST_SIG (SV, D, 1, log10, 0.01, 11.1)
+TEST_ULP (SV_NAME_D1 (log10), 1.97)
+TEST_DISABLE_FENV (SV_NAME_D1 (log10))
+TEST_INTERVAL (SV_NAME_D1 (log10), -0.0, -0x1p126, 100)
+TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log10), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log10), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_log10f_3u5.c b/math/aarch64/sve/log10f.c
similarity index 56%
rename from pl/math/sv_log10f_3u5.c
rename to math/aarch64/sve/log10f.c
index a685b23e5de539..6c3add45176193 100644
--- a/pl/math/sv_log10f_3u5.c
+++ b/math/aarch64/sve/log10f.c
@@ -1,19 +1,20 @@
 /*
  * Single-precision SVE log10 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float poly_0246[4];
   float poly_1357[4];
   float ln2, inv_ln10;
+  uint32_t off, lower;
 } data = {
   .poly_1357 = {
     /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
@@ -25,18 +26,23 @@ static const struct data
 		 -0x1.0fc92cp-4f },
   .ln2 = 0x1.62e43p-1f,
   .inv_ln10 = 0x1.bcb7b2p-2f,
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min 0x00800000
-#define Max 0x7f800000
-#define Thres 0x7f000000  /* Max - Min.  */
-#define Offset 0x3f2aaaab /* 0.666667.  */
+#define Thres 0x7f000000 /* asuint32(inf) - 0x00800000.  */
 #define MantissaMask 0x007fffff
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (log10f, x, y, special);
+  return sv_call_f32 (
+      log10f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE log10f using the same algorithm and
@@ -47,23 +53,25 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
 svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
-  svuint32_t ix = svreinterpret_u32 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
+
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thres);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  ix = svsub_x (pg, ix, Offset);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend.  */
-  ix = svand_x (pg, ix, MantissaMask);
-  ix = svadd_x (pg, ix, Offset);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* signextend.  */
+  svuint32_t ix = svand_x (pg, u_off, MantissaMask);
+  ix = svadd_x (pg, ix, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f);
 
   /* y = log10(1+r) + n*log10(2)
      log10(1+r) ~ r * InvLn(10) + P(r)
      where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for
      log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3).  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t r4 = svmul_x (pg, r2, r2);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t r4 = svmul_x (svptrue_b32 (), r2, r2);
   svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
   svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0);
   svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1);
@@ -78,16 +86,17 @@ svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
   hi = svmul_x (pg, hi, d->inv_ln10);
 
   if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y),
-			 special);
-  return svmla_x (pg, hi, r2, y);
+    return special_case (u_off, hi, r2, y, special);
+  return svmla_x (svptrue_b32 (), hi, r2, y);
 }
 
-PL_SIG (SV, F, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_F1 (log10), 2.82)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 100, inf, 50000)
+TEST_SIG (SV, F, 1, log10, 0.01, 11.1)
+TEST_ULP (SV_NAME_F1 (log10), 2.82)
+TEST_DISABLE_FENV (SV_NAME_F1 (log10))
+TEST_INTERVAL (SV_NAME_F1 (log10), -0.0, -0x1p126, 100)
+TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log10), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log10), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_log1p_2u5.c b/math/aarch64/sve/log1p.c
similarity index 88%
rename from pl/math/sv_log1p_2u5.c
rename to math/aarch64/sve/log1p.c
index f178ab16238ab0..e6b895b5290820 100644
--- a/pl/math/sv_log1p_2u5.c
+++ b/math/aarch64/sve/log1p.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision SVE log(1+x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -107,10 +107,12 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
   return y;
 }
 
-PL_SIG (SV, D, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (log1p), 1.97)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.001, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log1p), 1, inf, 10000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log1p), -1, -inf, 10)
+TEST_SIG (SV, D, 1, log1p, -0.9, 10.0)
+TEST_ULP (SV_NAME_D1 (log1p), 1.97)
+TEST_DISABLE_FENV (SV_NAME_D1 (log1p))
+TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.001, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log1p), 1, inf, 10000)
+TEST_INTERVAL (SV_NAME_D1 (log1p), -1, -inf, 10)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/log1pf.c b/math/aarch64/sve/log1pf.c
new file mode 100644
index 00000000000000..77ae6218f93198
--- /dev/null
+++ b/math/aarch64/sve/log1pf.c
@@ -0,0 +1,43 @@
+/*
+ * Single-precision vector log(x + 1) function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_log1pf_inline.h"
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special)
+{
+  return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()),
+		      special);
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+   error is 1.27 ULP very close to 0.5.
+   _ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2
+				 want 0x1.9f323ep-2.  */
+svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
+{
+  /* x < -1, Inf/Nan.  */
+  svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
+  special = svorn_z (pg, special, svcmpge (pg, x, -1));
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (x, special);
+
+  return sv_log1pf_inline (x, pg);
+}
+
+TEST_SIG (SV, F, 1, log1p, -0.9, 10.0)
+TEST_ULP (SV_NAME_F1 (log1p), 0.77)
+TEST_DISABLE_FENV (SV_NAME_F1 (log1p))
+TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0, 0x1p-23, 5000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0x1p-23, 1, 5000)
+TEST_INTERVAL (SV_NAME_F1 (log1p), 1, inf, 10000)
+TEST_INTERVAL (SV_NAME_F1 (log1p), -1, -inf, 10)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/log2.c b/math/aarch64/sve/log2.c
new file mode 100644
index 00000000000000..11c65c1b296309
--- /dev/null
+++ b/math/aarch64/sve/log2.c
@@ -0,0 +1,96 @@
+/*
+ * Double-precision SVE log2 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define N (1 << V_LOG2_TABLE_BITS)
+#define Max (0x7ff0000000000000)
+#define Min (0x0010000000000000)
+#define Thresh (0x7fe0000000000000) /* Max - Min.  */
+
+static const struct data
+{
+  double c0, c2;
+  double c1, c3;
+  double invln2, c4;
+  uint64_t off;
+} data = {
+  .c0 = -0x1.71547652b83p-1,
+  .c1 = 0x1.ec709dc340953p-2,
+  .c2 = -0x1.71547651c8f35p-2,
+  .c3 = 0x1.2777ebe12dda5p-2,
+  .c4 = -0x1.ec738d616fe26p-3,
+  .invln2 = 0x1.71547652b82fep0,
+  .off = 0x3fe6900900000000,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t w, svuint64_t tmp, svfloat64_t y, svfloat64_t r2,
+	      svbool_t special, const struct data *d)
+{
+  svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off));
+  return sv_call_f64 (log2, x, svmla_x (svptrue_b64 (), w, r2, y), special);
+}
+
+/* Double-precision SVE log2 routine.
+   Implements the same algorithm as AdvSIMD log10, with coefficients and table
+   entries scaled in extended precision.
+   The maximum observed error is 2.58 ULP:
+   SV_NAME_D1 (log2)(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
+					  want 0x1.fffb34198d9ddp-5.  */
+svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
+  svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG2_TABLE_BITS);
+  i = svand_x (pg, i, (N - 1) << 1);
+  svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
+  svfloat64_t z = svreinterpret_f64 (
+      svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
+
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log2_data.table[0].invc, i);
+  svfloat64_t log2c
+      = svld1_gather_index (pg, &__v_log2_data.table[0].log2c, i);
+
+  /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k.  */
+
+  svfloat64_t invln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->invln2);
+  svfloat64_t r = svmad_x (pg, invc, z, -1.0);
+  svfloat64_t w = svmla_lane_f64 (log2c, r, invln2_and_c4, 0);
+  w = svadd_x (pg, k, w);
+
+  svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1);
+  svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0);
+  y = svmla_lane_f64 (y, r2, invln2_and_c4, 1);
+  y = svmla_x (pg, p, r2, y);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (w, tmp, y, r2, special, d);
+  return svmla_x (pg, w, r2, y);
+}
+
+TEST_SIG (SV, D, 1, log2, 0.01, 11.1)
+TEST_ULP (SV_NAME_D1 (log2), 2.09)
+TEST_DISABLE_FENV (SV_NAME_D1 (log2))
+TEST_INTERVAL (SV_NAME_D1 (log2), -0.0, -0x1p126, 1000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 0.0, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_log2f_2u5.c b/math/aarch64/sve/log2f.c
similarity index 53%
rename from pl/math/sv_log2f_2u5.c
rename to math/aarch64/sve/log2f.c
index 9e96c62bbcc6c9..312fd448226bf3 100644
--- a/pl/math/sv_log2f_2u5.c
+++ b/math/aarch64/sve/log2f.c
@@ -1,18 +1,19 @@
 /*
  * Single-precision vector/SVE log2 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float poly_02468[5];
   float poly_1357[4];
+  uint32_t off, lower;
 } data = {
   .poly_1357 = {
     /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
@@ -22,18 +23,23 @@ static const struct data
   },
   .poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f,
 		  0x1.9d8ecap-3f, 0x1.9e495p-3f },
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min (0x00800000)
-#define Max (0x7f800000)
-#define Thres (0x7f000000) /* Max - Min.  */
+#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000.  */
 #define MantissaMask (0x007fffff)
-#define Off (0x3f2aaaab) /* 0.666667.  */
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (log2f, x, y, cmp);
+  return sv_call_f32 (
+      log2f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE log2f, using the same algorithm
@@ -45,19 +51,20 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint32_t u = svreinterpret_u32 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres);
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = svsub_x (pg, u, Off);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend.  */
-  u = svand_x (pg, u, MantissaMask);
-  u = svadd_x (pg, u, Off);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend.  */
+  svuint32_t u = svand_x (pg, u_off, MantissaMask);
+  u = svadd_x (pg, u, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
 
   /* y = log2(1+r) + n.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
 
   /* Evaluate polynomial using pairwise Horner scheme.  */
   svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
@@ -71,16 +78,17 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
   y = svmla_x (pg, q_01, r2, y);
 
   if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special);
-  return svmla_x (pg, n, r, y);
+    return special_case (u_off, n, r, y, special);
+  return svmla_x (svptrue_b32 (), n, r, y);
 }
 
-PL_SIG (SV, F, 1, log2, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_F1 (log2), 1.99)
-PL_TEST_EXPECT_FENV_ALWAYS (SV_NAME_F1 (log2))
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), -0.0, -0x1p126, 4000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0.0, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 100, inf, 50000)
+TEST_SIG (SV, F, 1, log2, 0.01, 11.1)
+TEST_ULP (SV_NAME_F1 (log2), 1.99)
+TEST_DISABLE_FENV (SV_NAME_F1 (log2))
+TEST_INTERVAL (SV_NAME_F1 (log2), -0.0, -0x1p126, 4000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 0.0, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_logf_3u4.c b/math/aarch64/sve/logf.c
similarity index 52%
rename from pl/math/sv_logf_3u4.c
rename to math/aarch64/sve/logf.c
index 96735524703621..2898e36974d6d8 100644
--- a/pl/math/sv_logf_3u4.c
+++ b/math/aarch64/sve/logf.c
@@ -1,19 +1,20 @@
 /*
  * Single-precision vector log function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float poly_0135[4];
   float poly_246[3];
   float ln2;
+  uint32_t off, lower;
 } data = {
   .poly_0135 = {
     /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so
@@ -22,21 +23,24 @@ static const struct data
     -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f
   },
   .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f },
-  .ln2 = 0x1.62e43p-1f
+  .ln2 = 0x1.62e43p-1f,
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min (0x00800000)
-#define Max (0x7f800000)
-#define Thresh (0x7f000000) /* Max - Min.  */
+#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000.  */
 #define Mask (0x007fffff)
-#define Off (0x3f2aaaab) /* 0.666667.  */
-
-float optr_aor_log_f32 (float);
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (optr_aor_log_f32, x, y, cmp);
+  return sv_call_f32 (
+      logf, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE logf, using the same algorithm and
@@ -47,19 +51,21 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint32_t u = svreinterpret_u32 (x);
-  svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh);
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t cmp = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = svsub_x (pg, u, Off);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend.  */
-  u = svand_x (pg, u, Mask);
-  u = svadd_x (pg, u, Off);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend.  */
+
+  svuint32_t u = svand_x (pg, u_off, Mask);
+  u = svadd_x (pg, u, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
 
   /* y = log(1+r) + n*ln2.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
   /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))).  */
   svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]);
   svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1);
@@ -72,15 +78,17 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
   p = svmla_x (pg, r, n, d->ln2);
 
   if (unlikely (svptest_any (pg, cmp)))
-    return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp);
+    return special_case (u_off, p, r2, y, cmp);
   return svmla_x (pg, p, r2, y);
 }
 
-PL_SIG (SV, F, 1, log, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_F1 (log), 2.85)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), -0.0, -inf, 100)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 0, 0x1p-126, 100)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 100, inf, 50000)
+TEST_SIG (SV, F, 1, log, 0.01, 11.1)
+TEST_ULP (SV_NAME_F1 (log), 2.85)
+TEST_DISABLE_FENV (SV_NAME_F1 (log))
+TEST_INTERVAL (SV_NAME_F1 (log), -0.0, -inf, 100)
+TEST_INTERVAL (SV_NAME_F1 (log), 0, 0x1p-126, 100)
+TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/modf.c b/math/aarch64/sve/modf.c
new file mode 100644
index 00000000000000..5944c7d37c4c19
--- /dev/null
+++ b/math/aarch64/sve/modf.c
@@ -0,0 +1,36 @@
+/*
+ * Double-precision SVE modf(x, *y) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+/* Modf algorithm. Produces exact values in all rounding modes.  */
+svfloat64_t SV_NAME_D1_L1 (modf) (svfloat64_t x, double *out_int,
+				  const svbool_t pg)
+{
+  /* Get integer component of x.  */
+  svfloat64_t fint_comp = svrintz_x (pg, x);
+
+  svst1_f64 (pg, out_int, fint_comp);
+
+  /* Subtract integer component from input.  */
+  svfloat64_t remaining = svsub_f64_x (svptrue_b64 (), x, fint_comp);
+
+  /* Return +0 for integer x.  */
+  svbool_t is_integer = svcmpeq (pg, x, fint_comp);
+  return svsel (is_integer, sv_f64 (0), remaining);
+}
+
+TEST_ULP (_ZGVsMxvl8_modf_frac, 0.0)
+TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_frac, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_frac, 1, inf, 20000)
+
+TEST_ULP (_ZGVsMxvl8_modf_int, 0.0)
+TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_int, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_int, 1, inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/modff.c b/math/aarch64/sve/modff.c
new file mode 100644
index 00000000000000..ad7ce4e2c88fb6
--- /dev/null
+++ b/math/aarch64/sve/modff.c
@@ -0,0 +1,36 @@
+/*
+ * Single-precision SVE modff(x, *y) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+/* Modff algorithm. Produces exact values in all rounding modes.  */
+svfloat32_t SV_NAME_F1_L1 (modf) (svfloat32_t x, float *out_int,
+				  const svbool_t pg)
+{
+  /* Get integer component of x.  */
+  svfloat32_t fint_comp = svrintz_x (pg, x);
+
+  svst1_f32 (pg, out_int, fint_comp);
+
+  /* Subtract integer component from input.  */
+  svfloat32_t remaining = svsub_f32_x (svptrue_b32 (), x, fint_comp);
+
+  /* Return +0 for integer x.  */
+  svbool_t is_integer = svcmpeq (pg, x, fint_comp);
+  return svsel (is_integer, sv_f32 (0), remaining);
+}
+
+TEST_ULP (_ZGVsMxvl4_modff_frac, 0.0)
+TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_frac, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_frac, 1, inf, 20000)
+
+TEST_ULP (_ZGVsMxvl4_modff_int, 0.0)
+TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_int, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_int, 1, inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_pow_1u5.c b/math/aarch64/sve/pow.c
similarity index 64%
rename from pl/math/sv_pow_1u5.c
rename to math/aarch64/sve/pow.c
index 0838810206a1a2..12b2fb42b2cb76 100644
--- a/pl/math/sv_pow_1u5.c
+++ b/math/aarch64/sve/pow.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision SVE pow(x, y) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* This version share a similar algorithm as AOR scalar pow.
 
@@ -23,8 +23,8 @@
    The SVE algorithm drops the tail in the exp computation at the price of
    a lower accuracy, slightly above 1ULP.
    The SVE algorithm also drops the special treatement of small (< 2^-65) and
-   large (> 2^63) finite values of |y|, as they only affect non-round to nearest
-   modes.
+   large (> 2^63) finite values of |y|, as they only affect non-round to
+   nearest modes.
 
    Maximum measured error is 1.04 ULPs:
    SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
@@ -33,19 +33,18 @@
 
 /* Data is defined in v_pow_log_data.c.  */
 #define N_LOG (1 << V_POW_LOG_TABLE_BITS)
-#define A __v_pow_log_data.poly
 #define Off 0x3fe6955500000000
 
 /* Data is defined in v_pow_exp_data.c.  */
 #define N_EXP (1 << V_POW_EXP_TABLE_BITS)
 #define SignBias (0x800 << V_POW_EXP_TABLE_BITS)
-#define C __v_pow_exp_data.poly
 #define SmallExp 0x3c9 /* top12(0x1p-54).  */
 #define BigExp 0x408   /* top12(512.).  */
 #define ThresExp 0x03f /* BigExp - SmallExp.  */
 #define HugeExp 0x409  /* top12(1024.).  */
 
 /* Constants associated with pow.  */
+#define SmallBoundX 0x1p-126
 #define SmallPowX 0x001 /* top12(0x1p-126).  */
 #define BigPowX 0x7ff	/* top12(INFINITY).  */
 #define ThresPowX 0x7fe /* BigPowX - SmallPowX.  */
@@ -53,6 +52,31 @@
 #define BigPowY 0x43e	/* top12(0x1.749p62).  */
 #define ThresPowY 0x080 /* BigPowY - SmallPowY.  */
 
+static const struct data
+{
+  double log_c0, log_c2, log_c4, log_c6, ln2_hi, ln2_lo;
+  double log_c1, log_c3, log_c5, off;
+  double n_over_ln2, exp_c2, ln2_over_n_hi, ln2_over_n_lo;
+  double exp_c0, exp_c1;
+} data = {
+  .log_c0 = -0x1p-1,
+  .log_c1 = -0x1.555555555556p-1,
+  .log_c2 = 0x1.0000000000006p-1,
+  .log_c3 = 0x1.999999959554ep-1,
+  .log_c4 = -0x1.555555529a47ap-1,
+  .log_c5 = -0x1.2495b9b4845e9p0,
+  .log_c6 = 0x1.0002b8b263fc3p0,
+  .off = Off,
+  .exp_c0 = 0x1.fffffffffffd4p-2,
+  .exp_c1 = 0x1.5555571d6ef9p-3,
+  .exp_c2 = 0x1.5555576a5adcep-5,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .n_over_ln2 = 0x1.71547652b82fep0 * N_EXP,
+  .ln2_over_n_hi = 0x1.62e42fefc0000p-9,
+  .ln2_over_n_lo = -0x1.c610ca86c3899p-45,
+};
+
 /* Check if x is an integer.  */
 static inline svbool_t
 sv_isint (svbool_t pg, svfloat64_t x)
@@ -71,7 +95,7 @@ sv_isnotint (svbool_t pg, svfloat64_t x)
 static inline svbool_t
 sv_isodd (svbool_t pg, svfloat64_t x)
 {
-  svfloat64_t y = svmul_x (pg, x, 0.5);
+  svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5);
   return sv_isnotint (pg, y);
 }
 
@@ -110,7 +134,7 @@ zeroinfnan (uint64_t i)
 static inline svbool_t
 sv_zeroinfnan (svbool_t pg, svuint64_t i)
 {
-  return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2), 1),
+  return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
 		  2 * asuint64 (INFINITY) - 1);
 }
 
@@ -163,23 +187,24 @@ sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2,
    additional 15 bits precision.  IX is the bit representation of x, but
    normalized in the subnormal range using the sign bit for the exponent.  */
 static inline svfloat64_t
-sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
+sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,
+	       const struct data *d)
 {
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_x (pg, ix, Off);
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
   svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS),
 			  sv_u64 (N_LOG - 1));
   svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
-  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, sv_u64 (0xfffULL << 52)));
+  svuint64_t iz = svsub_x (pg, ix, svlsl_x (pg, svreinterpret_u64 (k), 52));
   svfloat64_t z = svreinterpret_f64 (iz);
   svfloat64_t kd = svcvt_f64_x (pg, k);
 
   /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
   /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version
-     that uses array of structures. We also do the lookup earlier in the code to
-     make sure it finishes as early as possible.  */
+     that uses array of structures. We also do the lookup earlier in the code
+     to make sure it finishes as early as possible.  */
   svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i);
   svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i);
   svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i);
@@ -188,40 +213,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
   svfloat64_t r = svmad_x (pg, z, invc, -1.0);
   /* k*Ln2 + log(c) + r.  */
-  svfloat64_t t1 = svmla_x (pg, logc, kd, __v_pow_log_data.ln2_hi);
+
+  svfloat64_t ln2_hilo = svld1rq_f64 (svptrue_b64 (), &d->ln2_hi);
+  svfloat64_t t1 = svmla_lane_f64 (logc, kd, ln2_hilo, 0);
   svfloat64_t t2 = svadd_x (pg, t1, r);
-  svfloat64_t lo1 = svmla_x (pg, logctail, kd, __v_pow_log_data.ln2_lo);
+  svfloat64_t lo1 = svmla_lane_f64 (logctail, kd, ln2_hilo, 1);
   svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r);
 
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
-  svfloat64_t ar = svmul_x (pg, r, -0.5); /* A[0] = -0.5.  */
-  svfloat64_t ar2 = svmul_x (pg, r, ar);
-  svfloat64_t ar3 = svmul_x (pg, r, ar2);
+
+  svfloat64_t log_c02 = svld1rq_f64 (svptrue_b64 (), &d->log_c0);
+  svfloat64_t ar = svmul_lane_f64 (r, log_c02, 0);
+  svfloat64_t ar2 = svmul_x (svptrue_b64 (), r, ar);
+  svfloat64_t ar3 = svmul_x (svptrue_b64 (), r, ar2);
   /* k*Ln2 + log(c) + r + A[0]*r*r.  */
   svfloat64_t hi = svadd_x (pg, t2, ar2);
-  svfloat64_t lo3 = svmla_x (pg, svneg_x (pg, ar2), ar, r);
+  svfloat64_t lo3 = svmls_x (pg, ar2, ar, r);
   svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2);
   /* p = log1p(r) - r - A[0]*r*r.  */
   /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r *
      A[6])))).  */
-  svfloat64_t a56 = svmla_x (pg, sv_f64 (A[5]), r, A[6]);
-  svfloat64_t a34 = svmla_x (pg, sv_f64 (A[3]), r, A[4]);
-  svfloat64_t a12 = svmla_x (pg, sv_f64 (A[1]), r, A[2]);
+
+  svfloat64_t log_c46 = svld1rq_f64 (svptrue_b64 (), &d->log_c4);
+  svfloat64_t a56 = svmla_lane_f64 (sv_f64 (d->log_c5), r, log_c46, 1);
+  svfloat64_t a34 = svmla_lane_f64 (sv_f64 (d->log_c3), r, log_c46, 0);
+  svfloat64_t a12 = svmla_lane_f64 (sv_f64 (d->log_c1), r, log_c02, 1);
   svfloat64_t p = svmla_x (pg, a34, ar2, a56);
   p = svmla_x (pg, a12, ar2, p);
-  p = svmul_x (pg, ar3, p);
+  p = svmul_x (svptrue_b64 (), ar3, p);
   svfloat64_t lo = svadd_x (
-      pg, svadd_x (pg, svadd_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);
+      pg, svadd_x (pg, svsub_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);
   svfloat64_t y = svadd_x (pg, hi, lo);
   *tail = svadd_x (pg, svsub_x (pg, hi, y), lo);
   return y;
 }
 
+static inline svfloat64_t
+sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
+	     svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits,
+	     svuint64_t *ki, const struct data *d)
+{
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+  svfloat64_t n_over_ln2_and_c2 = svld1rq_f64 (svptrue_b64 (), &d->n_over_ln2);
+  svfloat64_t z = svmul_lane_f64 (x, n_over_ln2_and_c2, 0);
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  svfloat64_t kd = svrinta_x (pg, z);
+  *ki = svreinterpret_u64 (svcvt_s64_x (pg, kd));
+
+  svfloat64_t ln2_over_n_hilo
+      = svld1rq_f64 (svptrue_b64 (), &d->ln2_over_n_hi);
+  svfloat64_t r = x;
+  r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 0);
+  r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 1);
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  r = svadd_x (pg, r, xtail);
+  /* 2^(k/N) ~= scale.  */
+  svuint64_t idx = svand_x (pg, *ki, N_EXP - 1);
+  svuint64_t top
+      = svlsl_x (pg, svadd_x (pg, *ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  *sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);
+  *sbits = svadd_x (pg, *sbits, top);
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  *tmp = svmla_lane_f64 (sv_f64 (d->exp_c1), r, n_over_ln2_and_c2, 1);
+  *tmp = svmla_x (pg, sv_f64 (d->exp_c0), r, *tmp);
+  *tmp = svmla_x (pg, r, r2, *tmp);
+  svfloat64_t scale = svreinterpret_f64 (*sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  z = svmla_x (pg, scale, scale, *tmp);
+  return z;
+}
+
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1.  */
 static inline svfloat64_t
 sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
-	       svuint64_t sign_bias)
+	       svuint64_t sign_bias, const struct data *d)
 {
   /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow)
      and other cases of large values of x (scale * (1 + TMP) oflow).  */
@@ -229,73 +299,46 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
   /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54).  */
   svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp);
 
-  /* Conditions special, uflow and oflow are all expressed as uoflow &&
-     something, hence do not bother computing anything if no lane in uoflow is
-     true.  */
-  svbool_t special = svpfalse_b ();
-  svbool_t uflow = svpfalse_b ();
-  svbool_t oflow = svpfalse_b ();
+  svfloat64_t tmp;
+  svuint64_t sbits, ki;
   if (unlikely (svptest_any (pg, uoflow)))
     {
+      svfloat64_t z
+	  = sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);
+
       /* |x| is tiny (|x| <= 0x1p-54).  */
-      uflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);
+      svbool_t uflow
+	  = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);
       uflow = svand_z (pg, uoflow, uflow);
       /* |x| is huge (|x| >= 1024).  */
-      oflow = svcmpge (pg, abstop, HugeExp);
+      svbool_t oflow = svcmpge (pg, abstop, HugeExp);
       oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow));
+
       /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow
-	 or underflow.  */
-      special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
+    or underflow.  */
+      svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
+
+      /* Update result with special and large cases.  */
+      z = sv_call_specialcase (tmp, sbits, ki, z, special);
+
+      /* Handle underflow and overflow.  */
+      svbool_t x_is_neg = svcmplt (pg, x, 0);
+      svuint64_t sign_mask
+	  = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
+      svfloat64_t res_uoflow
+	  = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
+      res_uoflow = svreinterpret_f64 (
+	  svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
+      /* Avoid spurious underflow for tiny x.  */
+      svfloat64_t res_spurious_uflow
+	  = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
+
+      z = svsel (oflow, res_uoflow, z);
+      z = svsel (uflow, res_spurious_uflow, z);
+      return z;
     }
 
-  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
-  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
-  svfloat64_t z = svmul_x (pg, x, __v_pow_exp_data.n_over_ln2);
-  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
-  svfloat64_t shift = sv_f64 (__v_pow_exp_data.shift);
-  svfloat64_t kd = svadd_x (pg, z, shift);
-  svuint64_t ki = svreinterpret_u64 (kd);
-  kd = svsub_x (pg, kd, shift);
-  svfloat64_t r = x;
-  r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_hi);
-  r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_lo);
-  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  r = svadd_x (pg, r, xtail);
-  /* 2^(k/N) ~= scale.  */
-  svuint64_t idx = svand_x (pg, ki, N_EXP - 1);
-  svuint64_t top
-      = svlsl_x (pg, svadd_x (pg, ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);
-  /* This is only a valid scale when -1023*N < k < 1024*N.  */
-  svuint64_t sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);
-  sbits = svadd_x (pg, sbits, top);
-  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t tmp = svmla_x (pg, sv_f64 (C[1]), r, C[2]);
-  tmp = svmla_x (pg, sv_f64 (C[0]), r, tmp);
-  tmp = svmla_x (pg, r, r2, tmp);
-  svfloat64_t scale = svreinterpret_f64 (sbits);
-  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
-     is no spurious underflow here even without fma.  */
-  z = svmla_x (pg, scale, scale, tmp);
-
-  /* Update result with special and large cases.  */
-  if (unlikely (svptest_any (pg, special)))
-    z = sv_call_specialcase (tmp, sbits, ki, z, special);
-
-  /* Handle underflow and overflow.  */
-  svuint64_t sign_bit = svlsr_x (pg, svreinterpret_u64 (x), 63);
-  svbool_t x_is_neg = svcmpne (pg, sign_bit, 0);
-  svuint64_t sign_mask = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
-  svfloat64_t res_uoflow = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
-  res_uoflow = svreinterpret_f64 (
-      svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
-  z = svsel (oflow, res_uoflow, z);
-  /* Avoid spurious underflow for tiny x.  */
-  svfloat64_t res_spurious_uflow
-      = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
-  z = svsel (uflow, res_spurious_uflow, z);
-
-  return z;
+  return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);
 }
 
 static inline double
@@ -323,56 +366,46 @@ pow_sc (double x, double y)
       double_t x2 = x * x;
       if (ix >> 63 && checkint (iy) == 1)
 	x2 = -x2;
-      /* Without the barrier some versions of clang hoist the 1/x2 and
-	 thus division by zero exception can be signaled spuriously.  */
-      return (iy >> 63) ? opt_barrier_double (1 / x2) : x2;
+      return (iy >> 63) ? 1 / x2 : x2;
     }
   return x;
 }
 
 svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
 {
+  const struct data *d = ptr_barrier (&data);
+
   /* This preamble handles special case conditions used in the final scalar
      fallbacks. It also updates ix and sign_bias, that are used in the core
      computation too, i.e., exp( y * log (x) ).  */
   svuint64_t vix0 = svreinterpret_u64 (x);
   svuint64_t viy0 = svreinterpret_u64 (y);
-  svuint64_t vtopx0 = svlsr_x (svptrue_b64 (), vix0, 52);
 
   /* Negative x cases.  */
-  svuint64_t sign_bit = svlsr_m (pg, vix0, 63);
-  svbool_t xisneg = svcmpeq (pg, sign_bit, 1);
+  svbool_t xisneg = svcmplt (pg, x, 0);
 
   /* Set sign_bias and ix depending on sign of x and nature of y.  */
-  svbool_t yisnotint_xisneg = svpfalse_b ();
+  svbool_t yint_or_xpos = pg;
   svuint64_t sign_bias = sv_u64 (0);
   svuint64_t vix = vix0;
-  svuint64_t vtopx1 = vtopx0;
   if (unlikely (svptest_any (pg, xisneg)))
     {
       /* Determine nature of y.  */
-      yisnotint_xisneg = sv_isnotint (xisneg, y);
-      svbool_t yisint_xisneg = sv_isint (xisneg, y);
+      yint_or_xpos = sv_isint (xisneg, y);
       svbool_t yisodd_xisneg = sv_isodd (xisneg, y);
       /* ix set to abs(ix) if y is integer.  */
-      vix = svand_m (yisint_xisneg, vix0, 0x7fffffffffffffff);
-      vtopx1 = svand_m (yisint_xisneg, vtopx0, 0x7ff);
+      vix = svand_m (yint_or_xpos, vix0, 0x7fffffffffffffff);
       /* Set to SignBias if x is negative and y is odd.  */
       sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0));
     }
 
-  /* Special cases of x or y: zero, inf and nan.  */
-  svbool_t xspecial = sv_zeroinfnan (pg, vix0);
-  svbool_t yspecial = sv_zeroinfnan (pg, viy0);
-  svbool_t special = svorr_z (pg, xspecial, yspecial);
-
   /* Small cases of x: |x| < 0x1p-126.  */
-  svuint64_t vabstopx0 = svand_x (pg, vtopx0, 0x7ff);
-  svbool_t xsmall = svcmplt (pg, vabstopx0, SmallPowX);
-  if (unlikely (svptest_any (pg, xsmall)))
+  svbool_t xsmall = svaclt (yint_or_xpos, x, SmallBoundX);
+  if (unlikely (svptest_any (yint_or_xpos, xsmall)))
     {
       /* Normalize subnormal x so exponent becomes negative.  */
-      svbool_t topx_is_null = svcmpeq (xsmall, vtopx1, 0);
+      svuint64_t vtopx = svlsr_x (svptrue_b64 (), vix, 52);
+      svbool_t topx_is_null = svcmpeq (xsmall, vtopx, 0);
 
       svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52));
       vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff);
@@ -382,33 +415,38 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
 
   /* y_hi = log(ix, &y_lo).  */
   svfloat64_t vlo;
-  svfloat64_t vhi = sv_log_inline (pg, vix, &vlo);
+  svfloat64_t vhi = sv_log_inline (yint_or_xpos, vix, &vlo, d);
 
   /* z = exp(y_hi, y_lo, sign_bias).  */
-  svfloat64_t vehi = svmul_x (pg, y, vhi);
-  svfloat64_t velo = svmul_x (pg, y, vlo);
-  svfloat64_t vemi = svmls_x (pg, vehi, y, vhi);
-  velo = svsub_x (pg, velo, vemi);
-  svfloat64_t vz = sv_exp_inline (pg, vehi, velo, sign_bias);
+  svfloat64_t vehi = svmul_x (svptrue_b64 (), y, vhi);
+  svfloat64_t vemi = svmls_x (yint_or_xpos, vehi, y, vhi);
+  svfloat64_t velo = svnmls_x (yint_or_xpos, vemi, y, vlo);
+  svfloat64_t vz = sv_exp_inline (yint_or_xpos, vehi, velo, sign_bias, d);
 
   /* Cases of finite y and finite negative x.  */
-  vz = svsel (yisnotint_xisneg, sv_f64 (__builtin_nan ("")), vz);
+  vz = svsel (yint_or_xpos, vz, sv_f64 (__builtin_nan ("")));
+
+  /* Special cases of x or y: zero, inf and nan.  */
+  svbool_t xspecial = sv_zeroinfnan (svptrue_b64 (), vix0);
+  svbool_t yspecial = sv_zeroinfnan (svptrue_b64 (), viy0);
+  svbool_t special = svorr_z (svptrue_b64 (), xspecial, yspecial);
 
   /* Cases of zero/inf/nan x or y.  */
-  if (unlikely (svptest_any (pg, special)))
+  if (unlikely (svptest_any (svptrue_b64 (), special)))
     vz = sv_call2_f64 (pow_sc, x, y, vz, special);
 
   return vz;
 }
 
-PL_SIG (SV, D, 2, pow)
-PL_TEST_ULP (SV_NAME_D2 (pow), 0.55)
+TEST_SIG (SV, D, 2, pow)
+TEST_ULP (SV_NAME_D2 (pow), 0.55)
+TEST_DISABLE_FENV (SV_NAME_D2 (pow))
 /* Wide intervals spanning the whole domain but shared between x and y.  */
-#define SV_POW_INTERVAL2(xlo, xhi, ylo, yhi, n)                                \
-  PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, ylo, yhi, n)                  \
-  PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n)                \
-  PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n)                \
-  PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+#define SV_POW_INTERVAL2(xlo, xhi, ylo, yhi, n)                               \
+  TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, ylo, yhi, n)                    \
+  TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n)                  \
+  TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n)                  \
+  TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
 #define EXPAND(str) str##000000000
 #define SHL52(str) EXPAND (str)
 SV_POW_INTERVAL2 (0, SHL52 (SmallPowX), 0, inf, 40000)
@@ -426,10 +464,10 @@ SV_POW_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
 SV_POW_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
 SV_POW_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
 /* x is negative, y is odd or even integer, or y is real not integer.  */
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
 /* |x| is inf, y is odd or even integer, or y is real not integer.  */
 SV_POW_INTERVAL2 (inf, inf, 0.5, 0.5, 1)
 SV_POW_INTERVAL2 (inf, inf, 1.0, 1.0, 1)
@@ -438,7 +476,8 @@ SV_POW_INTERVAL2 (inf, inf, 3.0, 3.0, 1)
 /* 0.0^y.  */
 SV_POW_INTERVAL2 (0.0, 0.0, 0.0, 0x1p120, 1000)
 /* 1.0^y.  */
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_powf_2u6.c b/math/aarch64/sve/powf.c
similarity index 69%
rename from pl/math/sv_powf_2u6.c
rename to math/aarch64/sve/powf.c
index 2db0636aea6211..8457e83e749510 100644
--- a/pl/math/sv_powf_2u6.c
+++ b/math/aarch64/sve/powf.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision SVE powf function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* The following data is used in the SVE pow core computation
    and special case detection.  */
@@ -15,7 +15,6 @@
 #define Tlogc __v_powf_data.logc
 #define Texp __v_powf_data.scale
 #define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))
-#define Shift 0x1.8p52
 #define Norm 0x1p23f /* 0x4b000000.  */
 
 /* Overall ULP error bound for pow is 2.6 ulp
@@ -25,7 +24,7 @@ static const struct data
   double log_poly[4];
   double exp_poly[3];
   float uflow_bound, oflow_bound, small_bound;
-  uint32_t sign_bias, sign_mask, subnormal_bias, off;
+  uint32_t sign_bias, subnormal_bias, off;
 } data = {
   /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of
      V_POWF_EXP2_N.  */
@@ -42,7 +41,6 @@ static const struct data
   .small_bound = 0x1p-126f,
   .off = 0x3f35d000,
   .sign_bias = SignBias,
-  .sign_mask = 0x80000000,
   .subnormal_bias = 0x0b800000, /* 23 << 23.  */
 };
 
@@ -75,7 +73,7 @@ svisodd (svbool_t pg, svfloat32_t x)
 static inline svbool_t
 sv_zeroinfnan (svbool_t pg, svuint32_t i)
 {
-  return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1),
+  return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
 		  2u * 0x7f800000 - 1);
 }
 
@@ -104,7 +102,7 @@ zeroinfnan (uint32_t ix)
 }
 
 /* A scalar subroutine used to fix main power special cases. Similar to the
-   preamble of finite_powf except that we do not update ix and sign_bias. This
+   preamble of scalar powf except that we do not update ix and sign_bias. This
    is done in the preamble of the SVE powf.  */
 static inline float
 powf_specialcase (float x, float y, float z)
@@ -139,9 +137,14 @@ powf_specialcase (float x, float y, float z)
 }
 
 /* Scalar fallback for special case routines with custom signature.  */
-static inline svfloat32_t
-sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
+static svfloat32_t NOINLINE
+sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y)
 {
+  /* Special cases of x or y: zero, inf and nan.  */
+  svbool_t xspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x1));
+  svbool_t yspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x2));
+  svbool_t cmp = svorr_z (svptrue_b32 (), xspecial, yspecial);
+
   svbool_t p = svpfirst (cmp, svpfalse ());
   while (svptest_any (cmp, p))
     {
@@ -171,30 +174,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
 
   /* Polynomial to approximate log1p(r)/ln2.  */
   svfloat64_t logx = A (0);
-  logx = svmla_x (pg, A (1), r, logx);
-  logx = svmla_x (pg, A (2), r, logx);
-  logx = svmla_x (pg, A (3), r, logx);
-  logx = svmla_x (pg, y0, r, logx);
+  logx = svmad_x (pg, r, logx, A (1));
+  logx = svmad_x (pg, r, logx, A (2));
+  logx = svmad_x (pg, r, logx, A (3));
+  logx = svmad_x (pg, r, logx, y0);
   *pylogx = svmul_x (pg, y, logx);
 
   /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
-  svfloat64_t kd = svadd_x (pg, *pylogx, Shift);
-  svuint64_t ki = svreinterpret_u64 (kd);
-  kd = svsub_x (pg, kd, Shift);
+  svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx);
+  svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd));
 
   r = svsub_x (pg, *pylogx, kd);
 
   /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
-  svuint64_t t
-      = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1));
-  svuint64_t ski = svadd_x (pg, ki, sign_bias);
-  t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS));
+  svuint64_t t = svld1_gather_index (
+      svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1));
+  svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias);
+  t = svadd_x (svptrue_b64 (), t,
+	       svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS));
   svfloat64_t s = svreinterpret_f64 (t);
 
   svfloat64_t p = C (0);
   p = svmla_x (pg, C (1), p, r);
   p = svmla_x (pg, C (2), p, r);
-  p = svmla_x (pg, s, p, svmul_x (pg, s, r));
+  p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r));
 
   return p;
 }
@@ -208,19 +211,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
 {
   const svbool_t ptrue = svptrue_b64 ();
 
-  /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in
-     order to perform core computation in double precision.  */
+  /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
+     in order to perform core computation in double precision.  */
   const svbool_t pg_lo = svunpklo (pg);
   const svbool_t pg_hi = svunpkhi (pg);
-  svfloat64_t y_lo = svcvt_f64_x (
-      ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
-  svfloat64_t y_hi = svcvt_f64_x (
-      ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
-  svfloat32_t z = svreinterpret_f32 (iz);
-  svfloat64_t z_lo = svcvt_f64_x (
-      ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z))));
-  svfloat64_t z_hi = svcvt_f64_x (
-      ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z))));
+  svfloat64_t y_lo
+      = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
+  svfloat64_t y_hi
+      = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
+  svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz)));
+  svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz)));
   svuint64_t i_lo = svunpklo (i);
   svuint64_t i_hi = svunpkhi (i);
   svint64_t k_lo = svunpklo (k);
@@ -247,9 +247,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
 /* Implementation of SVE powf.
    Provides the same accuracy as AdvSIMD powf, since it relies on the same
    algorithm. The theoretical maximum error is under 2.60 ULPs.
-   Maximum measured error is 2.56 ULPs:
-   SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127
-						   want 0x1.fd4b06p+127.  */
+   Maximum measured error is 2.57 ULPs:
+   SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
+						   want 0x1.fff862p+127.  */
 svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
@@ -258,21 +258,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
   svuint32_t viy0 = svreinterpret_u32 (y);
 
   /* Negative x cases.  */
-  svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask);
-  svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask);
+  svbool_t xisneg = svcmplt (pg, x, sv_f32 (0));
 
   /* Set sign_bias and ix depending on sign of x and nature of y.  */
-  svbool_t yisnotint_xisneg = svpfalse_b ();
+  svbool_t yint_or_xpos = pg;
   svuint32_t sign_bias = sv_u32 (0);
   svuint32_t vix = vix0;
   if (unlikely (svptest_any (pg, xisneg)))
     {
       /* Determine nature of y.  */
-      yisnotint_xisneg = svisnotint (xisneg, y);
-      svbool_t yisint_xisneg = svisint (xisneg, y);
+      yint_or_xpos = svisint (xisneg, y);
       svbool_t yisodd_xisneg = svisodd (xisneg, y);
       /* ix set to abs(ix) if y is integer.  */
-      vix = svand_m (yisint_xisneg, vix0, 0x7fffffff);
+      vix = svand_m (yint_or_xpos, vix0, 0x7fffffff);
       /* Set to SignBias if x is negative and y is odd.  */
       sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0));
     }
@@ -283,8 +281,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
   svbool_t cmp = svorr_z (pg, xspecial, yspecial);
 
   /* Small cases of x: |x| < 0x1p-126.  */
-  svbool_t xsmall = svaclt (pg, x, d->small_bound);
-  if (unlikely (svptest_any (pg, xsmall)))
+  svbool_t xsmall = svaclt (yint_or_xpos, x, d->small_bound);
+  if (unlikely (svptest_any (yint_or_xpos, xsmall)))
     {
       /* Normalize subnormal x so exponent becomes negative.  */
       svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));
@@ -293,44 +291,48 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
       vix = svsel (xsmall, vix_norm, vix);
     }
   /* Part of core computation carried in working precision.  */
-  svuint32_t tmp = svsub_x (pg, vix, d->off);
-  svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
-			  V_POWF_LOG2_N - 1);
-  svuint32_t top = svand_x (pg, tmp, 0xff800000);
-  svuint32_t iz = svsub_x (pg, vix, top);
-  svint32_t k
-      = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS));
-
-  /* Compute core in extended precision and return intermediate ylogx results to
-      handle cases of underflow and underflow in exp.  */
+  svuint32_t tmp = svsub_x (yint_or_xpos, vix, d->off);
+  svuint32_t i = svand_x (
+      yint_or_xpos, svlsr_x (yint_or_xpos, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
+      V_POWF_LOG2_N - 1);
+  svuint32_t top = svand_x (yint_or_xpos, tmp, 0xff800000);
+  svuint32_t iz = svsub_x (yint_or_xpos, vix, top);
+  svint32_t k = svasr_x (yint_or_xpos, svreinterpret_s32 (top),
+			 (23 - V_POWF_EXP2_TABLE_BITS));
+
+  /* Compute core in extended precision and return intermediate ylogx results
+     to handle cases of underflow and underflow in exp.  */
   svfloat32_t ylogx;
-  svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d);
+  svfloat32_t ret
+      = sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d);
 
   /* Handle exp special cases of underflow and overflow.  */
-  svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
+  svuint32_t sign
+      = svlsl_x (yint_or_xpos, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
   svfloat32_t ret_oflow
-      = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY)));
+      = svreinterpret_f32 (svorr_x (yint_or_xpos, sign, asuint (INFINITY)));
   svfloat32_t ret_uflow = svreinterpret_f32 (sign);
-  ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret);
-  ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret);
+  ret = svsel (svcmple (yint_or_xpos, ylogx, d->uflow_bound), ret_uflow, ret);
+  ret = svsel (svcmpgt (yint_or_xpos, ylogx, d->oflow_bound), ret_oflow, ret);
 
   /* Cases of finite y and finite negative x.  */
-  ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret);
+  ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf ("")));
 
-  if (unlikely (svptest_any (pg, cmp)))
-    return sv_call_powf_sc (x, y, ret, cmp);
+  if (unlikely (svptest_any (cmp, cmp)))
+    return sv_call_powf_sc (x, y, ret);
 
   return ret;
 }
 
-PL_SIG (SV, F, 2, pow)
-PL_TEST_ULP (SV_NAME_F2 (pow), 2.06)
+TEST_SIG (SV, F, 2, pow)
+TEST_ULP (SV_NAME_F2 (pow), 2.08)
+TEST_DISABLE_FENV (SV_NAME_F2 (pow))
 /* Wide intervals spanning the whole domain but shared between x and y.  */
-#define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n)                               \
-  PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, ylo, yhi, n)                  \
-  PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n)                \
-  PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, ylo, yhi, n)                \
-  PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+#define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n)                              \
+  TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, ylo, yhi, n)                    \
+  TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n)                  \
+  TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, ylo, yhi, n)                  \
+  TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, -ylo, -yhi, n)
 SV_POWF_INTERVAL2 (0, 0x1p-126, 0, inf, 40000)
 SV_POWF_INTERVAL2 (0x1p-126, 1, 0, inf, 50000)
 SV_POWF_INTERVAL2 (1, inf, 0, inf, 50000)
@@ -342,10 +344,10 @@ SV_POWF_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
 SV_POWF_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
 SV_POWF_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
 /* x is negative, y is odd or even integer, or y is real not integer.  */
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
 /* |x| is inf, y is odd or even integer, or y is real not integer.  */
 SV_POWF_INTERVAL2 (inf, inf, 0.5, 0.5, 1)
 SV_POWF_INTERVAL2 (inf, inf, 1.0, 1.0, 1)
@@ -354,7 +356,8 @@ SV_POWF_INTERVAL2 (inf, inf, 3.0, 3.0, 1)
 /* 0.0^y.  */
 SV_POWF_INTERVAL2 (0.0, 0.0, 0.0, 0x1p120, 1000)
 /* 1.0^y.  */
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_sin_3u5.c b/math/aarch64/sve/sin.c
similarity index 89%
rename from pl/math/sv_sin_3u5.c
rename to math/aarch64/sve/sin.c
index a81f3fc80f3d77..7e22515ceb7949 100644
--- a/pl/math/sv_sin_3u5.c
+++ b/math/aarch64/sve/sin.c
@@ -1,13 +1,13 @@
 /*
  * Double-precision SVE sin(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -90,7 +90,9 @@ svfloat64_t SV_NAME_D1 (sin) (svfloat64_t x, const svbool_t pg)
   return svreinterpret_f64 (sveor_z (pg, svreinterpret_u64 (y), odd));
 }
 
-PL_SIG (SV, D, 1, sin, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_D1 (sin), 2.73)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0, 0x1p23, 1000000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0x1p23, inf, 10000)
+TEST_SIG (SV, D, 1, sin, -3.1, 3.1)
+TEST_ULP (SV_NAME_D1 (sin), 2.73)
+TEST_DISABLE_FENV (SV_NAME_D1 (sin))
+TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0, 0x1p23, 1000000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0x1p23, inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_sincos_3u5.c b/math/aarch64/sve/sincos.c
similarity index 72%
rename from pl/math/sv_sincos_3u5.c
rename to math/aarch64/sve/sincos.c
index f73550082d5b82..26b8bb3c6a5a21 100644
--- a/pl/math/sv_sincos_3u5.c
+++ b/math/aarch64/sve/sincos.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision vector sincos function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,12 +9,22 @@
    pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
    be linked against the scalar sincosf from math/.  */
 #define _GNU_SOURCE
-#include <math.h>
-#undef _GNU_SOURCE
 
-#include "sv_sincos_common.h"
 #include "sv_math.h"
-#include "pl_test.h"
+#include "sv_sincos_common.h"
+#include "test_defs.h"
+
+#include <math.h>
+
+/* sincos not available for all scalar libm implementations.  */
+#ifndef __GLIBC__
+static void
+sincos (double x, double *out_sin, double *out_cos)
+{
+  *out_sin = sin (x);
+  *out_cos = cos (x);
+}
+#endif
 
 static void NOINLINE
 special_case (svfloat64_t x, svbool_t special, double *out_sin,
@@ -50,12 +60,14 @@ _ZGVsMxvl8l8_sincos (svfloat64_t x, double *out_sin, double *out_cos,
     special_case (x, special, out_sin, out_cos);
 }
 
-PL_TEST_ULP (_ZGVsMxv_sincos_sin, 2.73)
-PL_TEST_ULP (_ZGVsMxv_sincos_cos, 2.73)
+TEST_DISABLE_FENV (_ZGVsMxv_sincos_sin)
+TEST_DISABLE_FENV (_ZGVsMxv_sincos_cos)
+TEST_ULP (_ZGVsMxv_sincos_sin, 2.73)
+TEST_ULP (_ZGVsMxv_sincos_cos, 2.73)
 #define SV_SINCOS_INTERVAL(lo, hi, n)                                         \
-  PL_TEST_INTERVAL (_ZGVsMxv_sincos_sin, lo, hi, n)                           \
-  PL_TEST_INTERVAL (_ZGVsMxv_sincos_cos, lo, hi, n)
-SV_SINCOS_INTERVAL (0, 0x1p23, 500000)
-SV_SINCOS_INTERVAL (-0, -0x1p23, 500000)
+  TEST_SYM_INTERVAL (_ZGVsMxv_sincos_sin, lo, hi, n)                          \
+  TEST_SYM_INTERVAL (_ZGVsMxv_sincos_cos, lo, hi, n)
+SV_SINCOS_INTERVAL (0, 0x1p-63, 50000)
+SV_SINCOS_INTERVAL (0x1p-63, 0x1p23, 500000)
 SV_SINCOS_INTERVAL (0x1p23, inf, 10000)
-SV_SINCOS_INTERVAL (-0x1p23, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_sincosf_1u8.c b/math/aarch64/sve/sincosf.c
similarity index 72%
rename from pl/math/sv_sincosf_1u8.c
rename to math/aarch64/sve/sincosf.c
index c335de8d3dbb0b..f3e956ee62e232 100644
--- a/pl/math/sv_sincosf_1u8.c
+++ b/math/aarch64/sve/sincosf.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision vector sincos function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,12 +9,22 @@
    pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
    be linked against the scalar sincosf from math/.  */
 #define _GNU_SOURCE
-#include <math.h>
-#undef _GNU_SOURCE
 
-#include "sv_sincosf_common.h"
 #include "sv_math.h"
-#include "pl_test.h"
+#include "sv_sincosf_common.h"
+#include "test_defs.h"
+
+#include <math.h>
+
+/* sincos not available for all scalar libm implementations.  */
+#ifndef __GLIBC__
+static void
+sincosf (float x, float *out_sin, float *out_cos)
+{
+  *out_sin = sinf (x);
+  *out_cos = cosf (x);
+}
+#endif
 
 static void NOINLINE
 special_case (svfloat32_t x, svbool_t special, float *out_sin, float *out_cos)
@@ -51,12 +61,14 @@ _ZGVsMxvl4l4_sincosf (svfloat32_t x, float *out_sin, float *out_cos,
     special_case (x, special, out_sin, out_cos);
 }
 
-PL_TEST_ULP (_ZGVsMxv_sincosf_sin, 1.17)
-PL_TEST_ULP (_ZGVsMxv_sincosf_cos, 1.31)
+TEST_DISABLE_FENV (_ZGVsMxv_sincosf_sin)
+TEST_DISABLE_FENV (_ZGVsMxv_sincosf_cos)
+TEST_ULP (_ZGVsMxv_sincosf_sin, 1.17)
+TEST_ULP (_ZGVsMxv_sincosf_cos, 1.31)
 #define SV_SINCOSF_INTERVAL(lo, hi, n)                                        \
-  PL_TEST_INTERVAL (_ZGVsMxv_sincosf_sin, lo, hi, n)                          \
-  PL_TEST_INTERVAL (_ZGVsMxv_sincosf_cos, lo, hi, n)
-SV_SINCOSF_INTERVAL (0, 0x1p20, 500000)
-SV_SINCOSF_INTERVAL (-0, -0x1p20, 500000)
+  TEST_SYM_INTERVAL (_ZGVsMxv_sincosf_sin, lo, hi, n)                         \
+  TEST_SYM_INTERVAL (_ZGVsMxv_sincosf_cos, lo, hi, n)
+SV_SINCOSF_INTERVAL (0, 0x1p-31, 50000)
+SV_SINCOSF_INTERVAL (0x1p-31, 0x1p20, 500000)
 SV_SINCOSF_INTERVAL (0x1p20, inf, 10000)
-SV_SINCOSF_INTERVAL (-0x1p20, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/sincospi.c b/math/aarch64/sve/sincospi.c
new file mode 100644
index 00000000000000..d06ca8cc416522
--- /dev/null
+++ b/math/aarch64/sve/sincospi.c
@@ -0,0 +1,47 @@
+/*
+ * Double-precision SVE sincospi(x, *y, *z) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_defs.h"
+#include "mathlib.h"
+#include "sv_sincospi_common.h"
+
+/* Double-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+    Worst-case error for sin is 3.09 ULP:
+    _ZGVsMxvl8l8_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
+						    want 0x1.fd54d0b327cf4p-1.
+   Worst-case error for sin is 3.16 ULP:
+    _ZGVsMxvl8l8_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
+						    want 0x1.fd2da484ff402p-1.
+ */
+void
+_ZGVsMxvl8l8_sincospi (svfloat64_t x, double *out_sin, double *out_cos,
+		       svbool_t pg)
+{
+  const struct sv_sincospi_data *d = ptr_barrier (&sv_sincospi_data);
+
+  svfloat64x2_t sc = sv_sincospi_inline (pg, x, d);
+
+  svst1 (pg, out_sin, svget2 (sc, 0));
+  svst1 (pg, out_cos, svget2 (sc, 1));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (_ZGVsMxvl8l8_sincospi_sin)
+TEST_DISABLE_FENV (_ZGVsMxvl8l8_sincospi_cos)
+TEST_ULP (_ZGVsMxvl8l8_sincospi_sin, 2.59)
+TEST_ULP (_ZGVsMxvl8l8_sincospi_cos, 2.66)
+#  define SV_SINCOSPI_INTERVAL(lo, hi, n)                                     \
+    TEST_SYM_INTERVAL (_ZGVsMxvl8l8_sincospi_sin, lo, hi, n)                  \
+    TEST_SYM_INTERVAL (_ZGVsMxvl8l8_sincospi_cos, lo, hi, n)
+SV_SINCOSPI_INTERVAL (0, 0x1p-63, 10000)
+SV_SINCOSPI_INTERVAL (0x1p-63, 0.5, 50000)
+SV_SINCOSPI_INTERVAL (0.5, 0x1p53, 50000)
+SV_SINCOSPI_INTERVAL (0x1p53, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/sincospif.c b/math/aarch64/sve/sincospif.c
new file mode 100644
index 00000000000000..20476f9346e916
--- /dev/null
+++ b/math/aarch64/sve/sincospif.c
@@ -0,0 +1,46 @@
+/*
+ * Single-precision SVE sincospi(x, *y, *z) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_defs.h"
+#include "mathlib.h"
+#include "sv_sincospif_common.h"
+
+/* Single-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+   Worst-case error for sin is 3.04 ULP:
+   _ZGVsMxvl4l4_sincospif_sin(0x1.b51b8p-2) got 0x1.f28b5ep-1 want
+   0x1.f28b58p-1.
+   Worst-case error for cos is 3.18 ULP:
+   _ZGVsMxvl4l4_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want
+   0x1.f7cd5p-1.  */
+void
+_ZGVsMxvl4l4_sincospif (svfloat32_t x, float *out_sin, float *out_cos,
+			svbool_t pg)
+{
+  const struct sv_sincospif_data *d = ptr_barrier (&sv_sincospif_data);
+
+  svfloat32x2_t sc = sv_sincospif_inline (pg, x, d);
+
+  svst1 (pg, out_sin, svget2 (sc, 0));
+  svst1 (pg, out_cos, svget2 (sc, 1));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (_ZGVsMxvl4l4_sincospif_sin)
+TEST_DISABLE_FENV (_ZGVsMxvl4l4_sincospif_cos)
+TEST_ULP (_ZGVsMxvl4l4_sincospif_sin, 2.54)
+TEST_ULP (_ZGVsMxvl4l4_sincospif_cos, 2.68)
+#  define SV_SINCOSPIF_INTERVAL(lo, hi, n)                                    \
+    TEST_SYM_INTERVAL (_ZGVsMxvl4l4_sincospif_sin, lo, hi, n)                 \
+    TEST_SYM_INTERVAL (_ZGVsMxvl4l4_sincospif_cos, lo, hi, n)
+SV_SINCOSPIF_INTERVAL (0, 0x1p-31, 10000)
+SV_SINCOSPIF_INTERVAL (0x1p-31, 0.5, 50000)
+SV_SINCOSPIF_INTERVAL (0.5, 0x1p31, 50000)
+SV_SINCOSPIF_INTERVAL (0x1p31, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_sinf_1u9.c b/math/aarch64/sve/sinf.c
similarity index 89%
rename from pl/math/sv_sinf_1u9.c
rename to math/aarch64/sve/sinf.c
index 675d7b2480f764..62127194d60f3f 100644
--- a/pl/math/sv_sinf_1u9.c
+++ b/math/aarch64/sve/sinf.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision SVE sin(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -87,7 +87,9 @@ svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
   return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, sin, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_F1 (sin), 1.40)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0, 0x1p23, 1000000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0x1p23, inf, 10000)
+TEST_SIG (SV, F, 1, sin, -3.1, 3.1)
+TEST_ULP (SV_NAME_F1 (sin), 1.40)
+TEST_DISABLE_FENV (SV_NAME_F1 (sin))
+TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0, 0x1p23, 1000000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0x1p23, inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_sinh_3u.c b/math/aarch64/sve/sinh.c
similarity index 88%
rename from pl/math/sv_sinh_3u.c
rename to math/aarch64/sve/sinh.c
index a01e19caecdab0..8a35c1c38525ce 100644
--- a/pl/math/sv_sinh_3u.c
+++ b/math/aarch64/sve/sinh.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision SVE sinh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -96,8 +96,10 @@ svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
   return svmul_x (pg, t, halfsign);
 }
 
-PL_SIG (SV, D, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (sinh), 2.08)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0, 0x1p-26, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p9, inf, 1000)
+TEST_SIG (SV, D, 1, sinh, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (sinh), 2.08)
+TEST_DISABLE_FENV (SV_NAME_D1 (sinh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0, 0x1p-26, 1000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p9, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_sinhf_2u3.c b/math/aarch64/sve/sinhf.c
similarity index 78%
rename from pl/math/sv_sinhf_2u3.c
rename to math/aarch64/sve/sinhf.c
index e34ecf378ad3bc..82b7ee4427806e 100644
--- a/pl/math/sv_sinhf_2u3.c
+++ b/math/aarch64/sve/sinhf.c
@@ -1,14 +1,13 @@
 /*
  * Single-precision SVE sinh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
+#include "test_sig.h"
+#include "test_defs.h"
 #include "sv_expm1f_inline.h"
 
 static const struct data
@@ -54,11 +53,13 @@ svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, svmul_x (pg, t, halfsign), special);
 
-  return svmul_x (pg, t, halfsign);
+  return svmul_x (svptrue_b32 (), t, halfsign);
 }
 
-PL_SIG (SV, F, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (sinh), 1.76)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0, 0x1.6a09e8p-32, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x1.6a09e8p-32, 0x42b0c0a7, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
+TEST_SIG (SV, F, 1, sinh, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (sinh), 1.76)
+TEST_DISABLE_FENV (SV_NAME_F1 (sinh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0, 0x1.6a09e8p-32, 1000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x1.6a09e8p-32, 0x42b0c0a7, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_sinpi_3u1.c b/math/aarch64/sve/sinpi.c
similarity index 66%
rename from pl/math/sv_sinpi_3u1.c
rename to math/aarch64/sve/sinpi.c
index c9f23da1b19b54..8fad3678b17294 100644
--- a/pl/math/sv_sinpi_3u1.c
+++ b/math/aarch64/sve/sinpi.c
@@ -1,19 +1,19 @@
 /*
  * Double-precision SVE sinpi(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "mathlib.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 static const struct data
 {
-  double poly[10];
+  double poly[10], range_val;
 } data = {
   /* Polynomial coefficients generated using Remez algorithm,
      see sinpi.sollya for details.  */
@@ -21,6 +21,7 @@ static const struct data
 	    -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
 	    0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
 	    0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
+  .range_val = 0x1p63,
 };
 
 /* A fast SVE implementation of sinpi.
@@ -37,8 +38,9 @@ svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
   svfloat64_t r = svsub_x (pg, x, n);
 
   /* Result should be negated based on if n is odd or not.  */
-  svuint64_t intn = svreinterpret_u64 (svcvt_s64_x (pg, n));
-  svuint64_t sign = svlsl_z (pg, intn, 63);
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint64_t intn = svreinterpret_u64 (svcvt_s64_z (pg, n));
+  svuint64_t sign = svlsl_z (cmp, intn, 63);
 
   /* y = sin(r).  */
   svfloat64_t r2 = svmul_x (pg, r, r);
@@ -49,9 +51,12 @@ svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
   return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
 }
 
-PL_SIG (SV, D, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (SV_NAME_D1 (sinpi), 2.61)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p51, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (SV_NAME_D1 (sinpi), 2.61)
+TEST_DISABLE_FENV (SV_NAME_D1 (sinpi))
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p51, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_sinpif_2u5.c b/math/aarch64/sve/sinpif.c
similarity index 61%
rename from pl/math/sv_sinpif_2u5.c
rename to math/aarch64/sve/sinpif.c
index ac3f924bed682c..b91768a29cb61c 100644
--- a/pl/math/sv_sinpif_2u5.c
+++ b/math/aarch64/sve/sinpif.c
@@ -1,23 +1,24 @@
 /*
  * Single-precision SVE sinpi(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "mathlib.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 static const struct data
 {
-  float poly[6];
+  float poly[6], range_val;
 } data = {
   /* Taylor series coefficents for sin(pi * x).  */
   .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
 	    0x1.50783p-4f, -0x1.e30750p-8f },
+  .range_val = 0x1p31,
 };
 
 /* A fast SVE implementation of sinpif.
@@ -34,8 +35,9 @@ svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
   svfloat32_t r = svsub_x (pg, x, n);
 
   /* Result should be negated based on if n is odd or not.  */
-  svuint32_t intn = svreinterpret_u32 (svcvt_s32_x (pg, n));
-  svuint32_t sign = svlsl_z (pg, intn, 31);
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint32_t intn = svreinterpret_u32 (svcvt_s32_z (pg, n));
+  svuint32_t sign = svlsl_z (cmp, intn, 31);
 
   /* y = sin(r).  */
   svfloat32_t r2 = svmul_x (pg, r, r);
@@ -45,9 +47,12 @@ svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
   return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (SV_NAME_F1 (sinpi), 1.99)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0.5, 0x1p22f, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p22f, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (SV_NAME_F1 (sinpi), 1.99)
+TEST_DISABLE_FENV (SV_NAME_F1 (sinpi))
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0.5, 0x1p22f, 10000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p22f, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/sv_expf_inline.h b/math/aarch64/sve/sv_expf_inline.h
new file mode 100644
index 00000000000000..6054e65bb202d8
--- /dev/null
+++ b/math/aarch64/sve/sv_expf_inline.h
@@ -0,0 +1,66 @@
+/*
+ * SVE helper for single-precision routines which calculate exp(x) and do
+ * not need special-case handling
+ *
+ * Copyright (c) 2023-2025, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_SV_EXPF_INLINE_H
+#define MATH_SV_EXPF_INLINE_H
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+struct sv_expf_data
+{
+  float c1, c3, inv_ln2;
+  float ln2_lo, c0, c2, c4;
+  float ln2_hi, shift;
+};
+
+/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
+   compatibility with polynomial helpers. Shift is 1.5*2^17 + 127.  */
+#define SV_EXPF_DATA                                                          \
+  {                                                                           \
+    /* Coefficients copied from the polynomial in AdvSIMD variant.  */        \
+    .c0 = 0x1.ffffecp-1f, .c1 = 0x1.fffdb6p-2f, .c2 = 0x1.555e66p-3f,         \
+    .c3 = 0x1.573e2ep-5f, .c4 = 0x1.0e4020p-7f, .inv_ln2 = 0x1.715476p+0f,    \
+    .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f,                        \
+    .shift = 0x1.803f8p17f,                                                   \
+  }
+
+#define C(i) sv_f32 (d->poly[i])
+
+static inline svfloat32_t
+expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
+{
+  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+
+  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_lo);
+
+  /* n = round(x/(ln2/N)).  */
+  svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift);
+  svfloat32_t n = svsub_x (pg, z, d->shift);
+
+  /* r = x - n*ln2/N.  */
+  svfloat32_t r = svmsb_x (pg, sv_f32 (d->ln2_hi), n, x);
+  r = svmls_lane (r, n, lane_consts, 0);
+
+  /* scale = 2^(n/N).  */
+  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+  /* poly(r) = exp(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4 + C4 r^5.  */
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+  svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
+  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+  return svmla_x (pg, scale, scale, poly);
+}
+
+#endif // MATH_SV_EXPF_INLINE_H
diff --git a/pl/math/sv_expm1f_inline.h b/math/aarch64/sve/sv_expm1f_inline.h
similarity index 65%
rename from pl/math/sv_expm1f_inline.h
rename to math/aarch64/sve/sv_expm1f_inline.h
index a6e2050ff4a640..35892f519690eb 100644
--- a/pl/math/sv_expm1f_inline.h
+++ b/math/aarch64/sve/sv_expm1f_inline.h
@@ -2,12 +2,12 @@
  * SVE helper for single-precision routines which calculate exp(x) - 1 and do
  * not need special-case handling
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_SV_EXPM1F_INLINE_H
-#define PL_MATH_SV_EXPM1F_INLINE_H
+#ifndef MATH_SV_EXPM1F_INLINE_H
+#define MATH_SV_EXPM1F_INLINE_H
 
 #include "sv_math.h"
 
@@ -16,21 +16,18 @@ struct sv_expm1f_data
   /* These 4 are grouped together so they can be loaded as one quadword, then
    used with _lane forms of svmla/svmls.  */
   float32_t c2, c4, ln2_hi, ln2_lo;
-  float32_t c0, c1, c3, inv_ln2, shift;
+  float c0, inv_ln2, c1, c3, special_bound;
 };
 
 /* Coefficients generated using fpminimax.  */
 #define SV_EXPM1F_DATA                                                        \
   {                                                                           \
-    .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5,            \
-    .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10,                                \
+    .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .inv_ln2 = 0x1.715476p+0f,      \
+    .c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7,                                 \
                                                                               \
-    .shift = 0x1.8p23f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f,    \
-    .ln2_lo = 0x1.7f7d1cp-20f,                                                \
+    .c4 = 0x1.6b55a2p-10, .ln2_lo = 0x1.7f7d1cp-20f, .ln2_hi = 0x1.62e4p-1f,  \
   }
 
-#define C(i) sv_f32 (d->c##i)
-
 static inline svfloat32_t
 expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
 {
@@ -44,9 +41,8 @@ expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
-  svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
-  j = svsub_x (pg, j, d->shift);
-  svint32_t i = svcvt_s32_x (pg, j);
+  svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2);
+  j = svrinta_x (pg, j);
 
   svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
   f = svmls_lane (f, j, lane_constants, 3);
@@ -56,18 +52,18 @@ expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
 	 x + ax^2 + bx^3 + cx^4 ....
      So we calculate the polynomial P(f) = a + bf + cf^2 + ...
      and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
-  svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
-  svfloat32_t f2 = svmul_x (pg, f, f);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1);
+  svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f);
   svfloat32_t p = svmla_x (pg, p12, f2, p34);
-  p = svmla_x (pg, C (0), f, p);
+  p = svmla_x (pg, sv_f32 (d->c0), f, p);
   p = svmla_x (pg, f, f2, p);
 
   /* Assemble the result.
      expm1(x) ~= 2^i * (p + 1) - 1
      Let t = 2^i.  */
-  svfloat32_t t = svscale_x (pg, sv_f32 (1), i);
-  return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+  svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j));
+  return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t);
 }
 
-#endif // PL_MATH_SV_EXPM1F_INLINE_H
\ No newline at end of file
+#endif // MATH_SV_EXPM1F_INLINE_H
diff --git a/pl/math/sv_log1p_inline.h b/math/aarch64/sve/sv_log1p_inline.h
similarity index 90%
rename from pl/math/sv_log1p_inline.h
rename to math/aarch64/sve/sv_log1p_inline.h
index 983f8e1b04134b..86a5bb1456f688 100644
--- a/pl/math/sv_log1p_inline.h
+++ b/math/aarch64/sve/sv_log1p_inline.h
@@ -2,14 +2,14 @@
  * Helper for SVE double-precision routines which calculate log(1 + x) and do
  * not need special-case handling
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
-#ifndef PL_MATH_SV_LOG1P_INLINE_H
-#define PL_MATH_SV_LOG1P_INLINE_H
+#ifndef MATH_SV_LOG1P_INLINE_H
+#define MATH_SV_LOG1P_INLINE_H
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
+#include "sv_poly_f64.h"
 
 static const struct sv_log1p_data
 {
@@ -67,8 +67,8 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg)
   svfloat64_t cm;
 
 #ifndef WANT_SV_LOG1P_K0_SHORTCUT
-#error                                                                         \
-  "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+# error                                                                       \
+      "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
 #elif WANT_SV_LOG1P_K0_SHORTCUT
   /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
      that the approximation is solely the polynomial.  */
@@ -93,4 +93,4 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg)
 
   return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
 }
-#endif // PL_MATH_SV_LOG1P_INLINE_H
+#endif // MATH_SV_LOG1P_INLINE_H
diff --git a/math/aarch64/sve/sv_log1pf_inline.h b/math/aarch64/sve/sv_log1pf_inline.h
new file mode 100644
index 00000000000000..238079c61a5b03
--- /dev/null
+++ b/math/aarch64/sve/sv_log1pf_inline.h
@@ -0,0 +1,83 @@
+/*
+ * Helper for SVE routines which calculate log(1 + x) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_SV_LOG1PF_INLINE_H
+#define MATH_SV_LOG1PF_INLINE_H
+
+#define SignExponentMask 0xff800000
+
+static const struct sv_log1pf_data
+{
+  float c0, c2, c4, c6;
+  float c1, c3, c5, c7;
+  float ln2, exp_bias, quarter;
+  uint32_t four, three_quarters;
+} sv_log1pf_data = {
+  /* Do not store first term of polynomial, which is -0.5, as
+     this can be fmov-ed directly instead of including it in
+     the main load-and-mla polynomial schedule.  */
+  .c0 = 0x1.5555aap-2f,		.c1 = -0x1.000038p-2f, .c2 = 0x1.99675cp-3f,
+  .c3 = -0x1.54ef78p-3f,	.c4 = 0x1.28a1f4p-3f,  .c5 = -0x1.0da91p-3f,
+  .c6 = 0x1.abcb6p-4f,		.c7 = -0x1.6f0d5ep-5f, .ln2 = 0x1.62e43p-1f,
+  .exp_bias = 0x1p-23f,		.quarter = 0x1p-2f,    .four = 0x40800000,
+  .three_quarters = 0x3f400000,
+};
+
+static inline svfloat32_t
+sv_log1pf_inline (svfloat32_t x, svbool_t pg)
+{
+  const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
+
+  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+			 is in [-0.25, 0.5]):
+   log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+   We approximate log1p(m) with a polynomial, then scale by
+   k*log(2). Instead of doing this directly, we use an intermediate
+   scale factor s = 4*k*log(2) to ensure the scale is representable
+   as a normalised fp32 number.  */
+  svfloat32_t m = svadd_x (pg, x, 1);
+
+  /* Choose k to scale x to the range [-1/4, 1/2].  */
+  svint32_t k
+      = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
+		 sv_s32 (SignExponentMask));
+
+  /* Scale x by exponent manipulation.  */
+  svfloat32_t m_scale = svreinterpret_f32 (
+      svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
+
+  /* Scale up to ensure that the scale factor is representable as normalised
+     fp32 number, and scale m down accordingly.  */
+  svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
+  svfloat32_t fconst = svld1rq_f32 (svptrue_b32 (), &d->ln2);
+  m_scale = svadd_x (pg, m_scale, svmla_lane_f32 (sv_f32 (-1), s, fconst, 2));
+
+  /* Evaluate polynomial on reduced interval.  */
+  svfloat32_t ms2 = svmul_x (svptrue_b32 (), m_scale, m_scale);
+
+  svfloat32_t c1357 = svld1rq_f32 (svptrue_b32 (), &d->c1);
+  svfloat32_t p01 = svmla_lane_f32 (sv_f32 (d->c0), m_scale, c1357, 0);
+  svfloat32_t p23 = svmla_lane_f32 (sv_f32 (d->c2), m_scale, c1357, 1);
+  svfloat32_t p45 = svmla_lane_f32 (sv_f32 (d->c4), m_scale, c1357, 2);
+  svfloat32_t p67 = svmla_lane_f32 (sv_f32 (d->c6), m_scale, c1357, 3);
+
+  svfloat32_t p = svmla_x (pg, p45, p67, ms2);
+  p = svmla_x (pg, p23, p, ms2);
+  p = svmla_x (pg, p01, p, ms2);
+
+  p = svmad_x (pg, m_scale, p, -0.5);
+  p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
+
+  /* The scale factor to be applied back at the end - by multiplying float(k)
+   by 2^-23 we get the unbiased exponent of k.  */
+  svfloat32_t scale_back = svmul_lane_f32 (svcvt_f32_x (pg, k), fconst, 1);
+  return svmla_lane_f32 (p, scale_back, fconst, 0);
+}
+
+#endif //  SV_LOG1PF_INLINE_H
diff --git a/math/aarch64/sve/sv_log_inline.h b/math/aarch64/sve/sv_log_inline.h
new file mode 100644
index 00000000000000..a1b169a0b72794
--- /dev/null
+++ b/math/aarch64/sve/sv_log_inline.h
@@ -0,0 +1,83 @@
+/*
+ * Double-precision vector log(x) function - inline version
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "math_config.h"
+
+#ifndef SV_LOG_INLINE_POLY_ORDER
+#  error Cannot use inline log helper without specifying poly order (options are 4 or 5)
+#endif
+
+#if SV_LOG_INLINE_POLY_ORDER == 4
+#  define POLY                                                                \
+    {                                                                         \
+      -0x1.ffffffffcbad3p-2, 0x1.555555578ed68p-2, -0x1.0000d3a1e7055p-2,     \
+	  0x1.999392d02a63ep-3                                                \
+    }
+#elif SV_LOG_INLINE_POLY_ORDER == 5
+#  define POLY                                                                \
+    {                                                                         \
+      -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,     \
+	  0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3                         \
+    }
+#else
+#  error Can only choose order 4 or 5 for log poly
+#endif
+
+struct sv_log_inline_data
+{
+  double poly[SV_LOG_INLINE_POLY_ORDER];
+  double ln2;
+  uint64_t off, sign_exp_mask;
+};
+
+#define SV_LOG_CONSTANTS                                                      \
+  {                                                                           \
+    .poly = POLY, .ln2 = 0x1.62e42fefa39efp-1,                                \
+    .sign_exp_mask = 0xfff0000000000000, .off = 0x3fe6900900000000            \
+  }
+
+#define P(i) sv_f64 (d->poly[i])
+#define N (1 << V_LOG_TABLE_BITS)
+
+static inline svfloat64_t
+sv_log_inline (svbool_t pg, svfloat64_t x, const struct sv_log_inline_data *d)
+{
+  svuint64_t ix = svreinterpret_u64 (x);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
+  /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
+     The actual value of i is double this due to table layout.  */
+  svuint64_t i
+      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
+  svint64_t k
+      = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift.  */
+  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+  svfloat64_t z = svreinterpret_f64 (iz);
+
+  /* Lookup in 2 global lists (length N).  */
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  svfloat64_t r = svmad_x (pg, invc, z, -1);
+  svfloat64_t kd = svcvt_f64_x (pg, k);
+  /* hi = r + log(c) + k*Ln2.  */
+  svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2);
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  svfloat64_t r2 = svmul_x (pg, r, r);
+  svfloat64_t y = svmla_x (pg, P (2), r, P (3));
+  svfloat64_t p = svmla_x (pg, P (0), r, P (1));
+#if SV_LOG_INLINE_POLY_ORDER == 5
+  y = svmla_x (pg, P (4), r2);
+#endif
+  y = svmla_x (pg, p, r2, y);
+  return svmla_x (pg, hi, r2, y);
+}
diff --git a/pl/math/sv_math.h b/math/aarch64/sve/sv_math.h
similarity index 72%
rename from pl/math/sv_math.h
rename to math/aarch64/sve/sv_math.h
index f67fe91803babf..db688a89303270 100644
--- a/pl/math/sv_math.h
+++ b/math/aarch64/sve/sv_math.h
@@ -1,24 +1,38 @@
 /*
  * Wrapper functions for SVE ACLE.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef SV_MATH_H
 #define SV_MATH_H
 
-#ifndef WANT_VMATH
-/* Enable the build of vector math code.  */
-# define WANT_VMATH 1
+/* Enable SVE in this translation unit. Note, because this is 'pushed' in
+   clang, any file including sv_math.h will have to pop it back off again by
+   ending the source file with CLOSE_SVE_ATTR. It is important that sv_math.h
+   is included first so that all functions have the target attribute.  */
+#ifdef __clang__
+# pragma clang attribute push(__attribute__((target("sve"))),                \
+			       apply_to = any(function))
+# define CLOSE_SVE_ATTR _Pragma("clang attribute pop")
+#else
+# pragma GCC target("+sve")
+# define CLOSE_SVE_ATTR
 #endif
 
-#if WANT_VMATH
+#include <arm_sve.h>
+#include <stdbool.h>
 
-# include <arm_sve.h>
-# include <stdbool.h>
+#include "math_config.h"
 
-# include "math_config.h"
+#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
+#define SV_NAME_D1(fun) _ZGVsMxv_##fun
+#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
+#define SV_NAME_D2(fun) _ZGVsMxvv_##fun
+#define SV_NAME_F1_L1(fun) _ZGVsMxvl4_##fun##f
+#define SV_NAME_D1_L1(fun) _ZGVsMxvl8_##fun
+#define SV_NAME_F1_L2(fun) _ZGVsMxvl4l4_##fun##f
 
 /* Double precision.  */
 static inline svint64_t
@@ -129,5 +143,3 @@ sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
   return y;
 }
 #endif
-
-#endif
diff --git a/pl/math/poly_sve_f32.h b/math/aarch64/sve/sv_poly_f32.h
similarity index 78%
rename from pl/math/poly_sve_f32.h
rename to math/aarch64/sve/sv_poly_f32.h
index a97e2ced027aeb..2d73014a4b450f 100644
--- a/pl/math/poly_sve_f32.h
+++ b/math/aarch64/sve/sv_poly_f32.h
@@ -2,12 +2,12 @@
  * Helpers for evaluating polynomials on single-precision SVE input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_SVE_F32_H
-#define PL_MATH_POLY_SVE_F32_H
+#ifndef MATH_POLY_SVE_F32_H
+#define MATH_POLY_SVE_F32_H
 
 #include <arm_sve.h>
 
@@ -17,7 +17,7 @@
 #define STYPE float
 #define VWRAP(f) sv_##f##_f32_x
 #define DUP svdup_f32
-#include "poly_sve_generic.h"
+#include "sv_poly_generic.h"
 #undef DUP
 #undef VWRAP
 #undef STYPE
diff --git a/pl/math/poly_sve_f64.h b/math/aarch64/sve/sv_poly_f64.h
similarity index 78%
rename from pl/math/poly_sve_f64.h
rename to math/aarch64/sve/sv_poly_f64.h
index 5fb14b3c1700b9..f92be9bf8e9c86 100644
--- a/pl/math/poly_sve_f64.h
+++ b/math/aarch64/sve/sv_poly_f64.h
@@ -2,12 +2,12 @@
  * Helpers for evaluating polynomials on double-precision SVE input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_SVE_F64_H
-#define PL_MATH_POLY_SVE_F64_H
+#ifndef MATH_POLY_SVE_F64_H
+#define MATH_POLY_SVE_F64_H
 
 #include <arm_sve.h>
 
@@ -17,7 +17,7 @@
 #define STYPE double
 #define VWRAP(f) sv_##f##_f64_x
 #define DUP svdup_f64
-#include "poly_sve_generic.h"
+#include "sv_poly_generic.h"
 #undef DUP
 #undef VWRAP
 #undef STYPE
diff --git a/pl/math/poly_sve_generic.h b/math/aarch64/sve/sv_poly_generic.h
similarity index 91%
rename from pl/math/poly_sve_generic.h
rename to math/aarch64/sve/sv_poly_generic.h
index b568e4cddff38a..a1fc59baa8d3ba 100644
--- a/pl/math/poly_sve_generic.h
+++ b/math/aarch64/sve/sv_poly_generic.h
@@ -2,7 +2,7 @@
  * Helpers for evaluating polynomials with various schemes - specific to SVE
  * but precision-agnostic.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -299,3 +299,33 @@ static inline VTYPE VWRAP (pw_horner_18) (svbool_t pg, VTYPE x, VTYPE x2,
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_18);
 }
+
+static inline VTYPE VWRAP (lw_pw_horner_5) (svbool_t pg, VTYPE x, VTYPE x2,
+					    const STYPE *poly_even,
+					    const STYPE *poly_odd)
+{
+  VTYPE c13 = svld1rq (pg, poly_odd);
+
+  VTYPE p01 = svmla_lane (DUP (poly_even[0]), x, c13, 0);
+  VTYPE p23 = svmla_lane (DUP (poly_even[1]), x, c13, 1);
+  VTYPE p45 = svmla_x (pg, DUP (poly_even[2]), x, poly_odd[2]);
+
+  VTYPE p;
+  p = svmla_x (pg, p23, x2, p45);
+  p = svmla_x (pg, p01, x2, p);
+  return p;
+}
+static inline VTYPE VWRAP (lw_pw_horner_9) (svbool_t pg, VTYPE x, VTYPE x2,
+					    const STYPE *poly_even,
+					    const STYPE *poly_odd)
+{
+  VTYPE c13 = svld1rq (pg, poly_odd);
+
+  VTYPE p49 = VWRAP (lw_pw_horner_5) (pg, x, x2, poly_even + 2, poly_odd + 2);
+  VTYPE p23 = svmla_lane (DUP (poly_even[1]), x, c13, 1);
+
+  VTYPE p29 = svmla_x (pg, p23, x2, p49);
+  VTYPE p01 = svmla_lane (DUP (poly_even[0]), x, c13, 0);
+
+  return svmla_x (pg, p01, x2, p29);
+}
diff --git a/pl/math/sv_sincos_common.h b/math/aarch64/sve/sv_sincos_common.h
similarity index 97%
rename from pl/math/sv_sincos_common.h
rename to math/aarch64/sve/sv_sincos_common.h
index f7b58deb90bdfe..2a537da157b04b 100644
--- a/pl/math/sv_sincos_common.h
+++ b/math/aarch64/sve/sv_sincos_common.h
@@ -1,12 +1,12 @@
 /*
  * Core approximation for double-precision vector sincos
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
+#include "sv_poly_f64.h"
 
 static const struct sv_sincos_data
 {
diff --git a/pl/math/sv_sincosf_common.h b/math/aarch64/sve/sv_sincosf_common.h
similarity index 98%
rename from pl/math/sv_sincosf_common.h
rename to math/aarch64/sve/sv_sincosf_common.h
index 714e996443b3d1..bda89ed2468074 100644
--- a/pl/math/sv_sincosf_common.h
+++ b/math/aarch64/sve/sv_sincosf_common.h
@@ -1,7 +1,7 @@
 /*
  * Core approximation for single-precision vector sincos
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/math/aarch64/sve/sv_sincospi_common.h b/math/aarch64/sve/sv_sincospi_common.h
new file mode 100644
index 00000000000000..672ebbc8e855f4
--- /dev/null
+++ b/math/aarch64/sve/sv_sincospi_common.h
@@ -0,0 +1,76 @@
+/*
+ * Core approximation for double-precision SVE sincospi
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "sv_poly_f64.h"
+
+static const struct sv_sincospi_data
+{
+  double c0, c2, c4, c6, c8;
+  double c1, c3, c5, c7, c9;
+  double range_val;
+} sv_sincospi_data = {
+  /* Polynomial coefficients generated using Remez algorithm,
+     see sinpi.sollya for details.  */
+  .c0 = 0x1.921fb54442d184p1,
+  .c1 = -0x1.4abbce625be53p2,
+  .c2 = 0x1.466bc6775ab16p1,
+  .c3 = -0x1.32d2cce62dc33p-1,
+  .c4 = 0x1.507834891188ep-4,
+  .c5 = -0x1.e30750a28c88ep-8,
+  .c6 = 0x1.e8f48308acda4p-12,
+  .c7 = -0x1.6fc0032b3c29fp-16,
+  .c8 = 0x1.af86ae521260bp-21,
+  .c9 = -0x1.012a9870eeb7dp-25,
+  /* Exclusive upper bound for a signed integer.  */
+  .range_val = 0x1p63
+};
+
+/* Double-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+    Worst-case error for sin is 3.09 ULP:
+    _ZGVsMxvl8l8_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
+						    want 0x1.fd54d0b327cf4p-1.
+   Worst-case error for cos is 3.16 ULP:
+    _ZGVsMxvl8l8_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
+						    want 0x1.fd2da484ff402p-1.
+ */
+static inline svfloat64x2_t
+sv_sincospi_inline (svbool_t pg, svfloat64_t x,
+		    const struct sv_sincospi_data *d)
+{
+  const svbool_t pt = svptrue_b64 ();
+
+  /* r = x - rint(x).  */
+  /* pt hints unpredicated instruction.  */
+  svfloat64_t rx = svrinta_x (pg, x);
+  svfloat64_t sr = svsub_x (pt, x, rx);
+
+  /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
+  svfloat64_t cr = svsubr_x (pg, svabs_x (pg, sr), 0.5);
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  /* pt hints unpredicated instruction.  */
+  svfloat64_t sr2 = svmul_x (pt, sr, sr);
+  svfloat64_t cr2 = svmul_x (pt, cr, cr);
+  svfloat64_t sr4 = svmul_x (pt, sr2, sr2);
+  svfloat64_t cr4 = svmul_x (pt, cr2, cr2);
+
+  /* If rint(x) is odd, the sign of the result should be inverted for sinpi and
+    re-introduced for cospi. cmp filters rxs that saturate to max sint.  */
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint64_t odd = svlsl_x (pt, svreinterpret_u64 (svcvt_s64_z (pg, rx)), 63);
+  sr = svreinterpret_f64 (sveor_x (pt, svreinterpret_u64 (sr), odd));
+  cr = svreinterpret_f64 (sveor_m (cmp, svreinterpret_u64 (cr), odd));
+
+  svfloat64_t sinpix = svmul_x (
+      pt, sv_lw_pw_horner_9_f64_x (pg, sr2, sr4, &(d->c0), &(d->c1)), sr);
+  svfloat64_t cospix = svmul_x (
+      pt, sv_lw_pw_horner_9_f64_x (pg, cr2, cr4, &(d->c0), &(d->c1)), cr);
+
+  return svcreate2 (sinpix, cospix);
+}
diff --git a/math/aarch64/sve/sv_sincospif_common.h b/math/aarch64/sve/sv_sincospif_common.h
new file mode 100644
index 00000000000000..4b9101de74ed9d
--- /dev/null
+++ b/math/aarch64/sve/sv_sincospif_common.h
@@ -0,0 +1,82 @@
+/*
+ * Helper for single-precision SVE sincospi
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "sv_poly_f32.h"
+
+const static struct sv_sincospif_data
+{
+  float c0, c2, c4;
+  float c1, c3, c5;
+  float range_val;
+} sv_sincospif_data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .c0 = 0x1.921fb6p1f,
+  .c1 = -0x1.4abbcep2f,
+  .c2 = 0x1.466bc6p1f,
+  .c3 = -0x1.32d2ccp-1f,
+  .c4 = 0x1.50783p-4f,
+  .c5 = -0x1.e30750p-8f,
+  /* Exclusive upper bound for a signed integer.  */
+  .range_val = 0x1p31f,
+};
+
+/* Single-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+   Worst-case error for sin is 3.04 ULP:
+   _ZGVsMxvl4l4_sincospif_sin(0x1.b51b8p-2) got 0x1.f28b5ep-1 want
+   0x1.f28b58p-1.
+   Worst-case error for cos is 3.18 ULP:
+   _ZGVsMxvl4l4_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want
+   0x1.f7cd5p-1.  */
+static inline svfloat32x2_t
+sv_sincospif_inline (svbool_t pg, svfloat32_t x,
+		     const struct sv_sincospif_data *d)
+{
+  const svbool_t pt = svptrue_b32 ();
+
+  /* r = x - rint(x).  */
+  svfloat32_t rx = svrinta_x (pg, x);
+  svfloat32_t sr = svsub_x (pt, x, rx);
+
+  /* cospi(x) = sinpi(0.5 - abs(r)) for values -1/2 .. 1/2.  */
+  svfloat32_t cr = svsubr_x (pt, svabs_x (pg, sr), 0.5f);
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  svfloat32_t sr2 = svmul_x (pt, sr, sr);
+  svfloat32_t sr4 = svmul_x (pt, sr2, sr2);
+  svfloat32_t cr2 = svmul_x (pt, cr, cr);
+  svfloat32_t cr4 = svmul_x (pt, cr2, cr2);
+
+  /* If rint(x) is odd, the sign of the result should be inverted for sinpi and
+     re-introduced for cospi. cmp filters rxs that saturate to max sint.  */
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint32_t odd = svlsl_x (pt, svreinterpret_u32 (svcvt_s32_z (pg, rx)), 31);
+  sr = svreinterpret_f32 (sveor_x (pt, svreinterpret_u32 (sr), odd));
+  cr = svreinterpret_f32 (sveor_m (cmp, svreinterpret_u32 (cr), odd));
+
+  svfloat32_t c135 = svld1rq_f32 (svptrue_b32 (), &d->c1);
+
+  svfloat32_t sp01 = svmla_lane (sv_f32 (d->c0), sr2, c135, 0);
+  svfloat32_t sp23 = svmla_lane (sv_f32 (d->c2), sr2, c135, 1);
+  svfloat32_t sp45 = svmla_lane (sv_f32 (d->c4), sr2, c135, 2);
+
+  svfloat32_t cp01 = svmla_lane (sv_f32 (d->c0), cr2, c135, 0);
+  svfloat32_t cp23 = svmla_lane (sv_f32 (d->c2), cr2, c135, 1);
+  svfloat32_t cp45 = svmla_lane (sv_f32 (d->c4), cr2, c135, 2);
+
+  svfloat32_t sp = svmla_x (pg, sp23, sr4, sp45);
+  svfloat32_t cp = svmla_x (pg, cp23, cr4, cp45);
+
+  sp = svmla_x (pg, sp01, sr4, sp);
+  cp = svmla_x (pg, cp01, cr4, cp);
+
+  svfloat32_t sinpix = svmul_x (pt, sp, sr);
+  svfloat32_t cospix = svmul_x (pt, cp, cr);
+
+  return svcreate2 (sinpix, cospix);
+}
diff --git a/math/aarch64/sve/tan.c b/math/aarch64/sve/tan.c
new file mode 100644
index 00000000000000..1dfc5c422d5e67
--- /dev/null
+++ b/math/aarch64/sve/tan.c
@@ -0,0 +1,131 @@
+/*
+ * Double-precision SVE tan(x) function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  double c2, c4, c6, c8;
+  double poly_1357[4];
+  double c0, inv_half_pi;
+  double half_pi_hi, half_pi_lo, range_val;
+} data = {
+  /* Polynomial generated with FPMinimax.  */
+  .c2 = 0x1.ba1ba1bb46414p-5,
+  .c4 = 0x1.226e5e5ecdfa3p-7,
+  .c6 = 0x1.7ea75d05b583ep-10,
+  .c8 = 0x1.4e4fd14147622p-12,
+  .poly_1357 = { 0x1.1111111110a63p-3, 0x1.664f47e5b5445p-6,
+		 0x1.d6c7ddbf87047p-9, 0x1.289f22964a03cp-11 },
+  .c0 = 0x1.5555555555556p-2,
+  .inv_half_pi = 0x1.45f306dc9c883p-1,
+  .half_pi_hi = 0x1.921fb54442d18p0,
+  .half_pi_lo = 0x1.1a62633145c07p-54,
+  .range_val = 0x1p23,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg,
+	      svbool_t special)
+{
+  svbool_t use_recip = svcmpeq (
+      pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0);
+
+  svfloat64_t n = svmad_x (pg, p, p, -1);
+  svfloat64_t d = svmul_x (svptrue_b64 (), p, 2);
+  svfloat64_t swap = n;
+  n = svneg_m (n, use_recip, d);
+  d = svsel (use_recip, swap, d);
+  svfloat64_t y = svdiv_x (svnot_z (pg, special), n, d);
+  return sv_call_f64 (tan, x, y, special);
+}
+
+/* Vector approximation for double-precision tan.
+   Maximum measured error is 3.48 ULP:
+   _ZGVsMxv_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
+				      want -0x1.f6ccd8ecf7deap+37.  */
+svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
+{
+  const struct data *dat = ptr_barrier (&data);
+  svfloat64_t half_pi_c0 = svld1rq (svptrue_b64 (), &dat->c0);
+  /* q = nearest integer to 2 * x / pi.  */
+  svfloat64_t q = svmul_lane (x, half_pi_c0, 1);
+  q = svrinta_x (pg, q);
+
+  /* Use q to reduce x to r in [-pi/4, pi/4], by:
+     r = x - q * pi/2, in extended precision.  */
+  svfloat64_t r = x;
+  svfloat64_t half_pi = svld1rq (svptrue_b64 (), &dat->half_pi_hi);
+  r = svmls_lane (r, q, half_pi, 0);
+  r = svmls_lane (r, q, half_pi, 1);
+  /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
+     formula.  */
+  r = svmul_x (svptrue_b64 (), r, 0.5);
+
+  /* Approximate tan(r) using order 8 polynomial.
+     tan(x) is odd, so polynomial has the form:
+     tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
+     Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
+     Then compute the approximation by:
+     tan(r) ~= r + r^3 * (C0 + r^2 * P(r)).  */
+
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t r4 = svmul_x (svptrue_b64 (), r2, r2);
+  svfloat64_t r8 = svmul_x (svptrue_b64 (), r4, r4);
+  /* Use offset version coeff array by 1 to evaluate from C1 onwards.  */
+  svfloat64_t C_24 = svld1rq (svptrue_b64 (), &dat->c2);
+  svfloat64_t C_68 = svld1rq (svptrue_b64 (), &dat->c6);
+
+  /* Use offset version coeff array by 1 to evaluate from C1 onwards.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (dat->poly_1357[0]), r2, C_24, 0);
+  svfloat64_t p23 = svmla_lane_f64 (sv_f64 (dat->poly_1357[1]), r2, C_24, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, p23, r4);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (dat->poly_1357[2]), r2, C_68, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (dat->poly_1357[3]), r2, C_68, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, p67, r4);
+
+  svfloat64_t p = svmla_x (pg, p03, p47, r8);
+
+  svfloat64_t z = svmul_x (svptrue_b64 (), p, r);
+  z = svmul_x (svptrue_b64 (), r2, z);
+  z = svmla_lane (z, r, half_pi_c0, 0);
+  p = svmla_x (pg, r, r2, z);
+
+  /* Recombination uses double-angle formula:
+     tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
+     and reciprocity around pi/2:
+     tan(x) = 1 / (tan(pi/2 - x))
+     to assemble result using change-of-sign and conditional selection of
+     numerator/denominator dependent on odd/even-ness of q (quadrant).  */
+
+  /* Invert condition to catch NaNs and Infs as well as large values.  */
+  svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val));
+
+  if (unlikely (svptest_any (pg, special)))
+    {
+      return special_case (x, p, q, pg, special);
+    }
+  svbool_t use_recip = svcmpeq (
+      pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0);
+
+  svfloat64_t n = svmad_x (pg, p, p, -1);
+  svfloat64_t d = svmul_x (svptrue_b64 (), p, 2);
+  svfloat64_t swap = n;
+  n = svneg_m (n, use_recip, d);
+  d = svsel (use_recip, swap, d);
+  return svdiv_x (pg, n, d);
+}
+
+TEST_SIG (SV, D, 1, tan, -3.1, 3.1)
+TEST_ULP (SV_NAME_D1 (tan), 2.99)
+TEST_DISABLE_FENV (SV_NAME_D1 (tan))
+TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0, 0x1p23, 500000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0x1p23, inf, 5000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_tanf_3u5.c b/math/aarch64/sve/tanf.c
similarity index 79%
rename from pl/math/sv_tanf_3u5.c
rename to math/aarch64/sve/tanf.c
index 6b8cd1e64b446a..d34fc2fc1a4e61 100644
--- a/pl/math/sv_tanf_3u5.c
+++ b/math/aarch64/sve/tanf.c
@@ -1,13 +1,13 @@
 /*
  * Single-precision vector tan(x) function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -50,21 +50,16 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* Determine whether input is too large to perform fast regression.  */
-  svbool_t cmp = svacge (pg, x, d->range_val);
-
   svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1);
   svfloat32_t pi_vals = svld1rq (svptrue_b32 (), &d->pio2_1);
 
   /* n = rint(x/(pi/2)).  */
-  svfloat32_t q = svmla_lane (sv_f32 (d->shift), x, pi_vals, 3);
-  svfloat32_t n = svsub_x (pg, q, d->shift);
+  svfloat32_t n = svrintn_x (pg, svmul_lane (x, pi_vals, 3));
   /* n is already a signed integer, simply convert it.  */
   svint32_t in = svcvt_s32_x (pg, n);
   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
   svint32_t alt = svand_x (pg, in, 1);
   svbool_t pred_alt = svcmpne (pg, alt, 0);
-
   /* r = x - n * (pi/2)  (range reduction into 0 .. pi/4).  */
   svfloat32_t r;
   r = svmls_lane (x, n, pi_vals, 0);
@@ -83,7 +78,7 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
 
   /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4],
      using Estrin on z^2.  */
-  svfloat32_t z2 = svmul_x (pg, z, z);
+  svfloat32_t z2 = svmul_x (svptrue_b32 (), r, r);
   svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0);
   svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1);
   svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2);
@@ -96,24 +91,27 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
 
   svfloat32_t y = svmla_x (pg, z, p, svmul_x (pg, z, z2));
 
-  /* Transform result back, if necessary.  */
-  svfloat32_t inv_y = svdivr_x (pg, y, 1.0f);
-
   /* No need to pass pg to specialcase here since cmp is a strict subset,
      guaranteed by the cmpge above.  */
+
+  /* Determine whether input is too large to perform fast regression.  */
+  svbool_t cmp = svacge (pg, x, d->range_val);
   if (unlikely (svptest_any (pg, cmp)))
-    return special_case (x, svsel (pred_alt, inv_y, y), cmp);
+    return special_case (x, svdivr_x (pg, y, 1.0f), cmp);
 
+  svfloat32_t inv_y = svdivr_x (pg, y, 1.0f);
   return svsel (pred_alt, inv_y, y);
 }
 
-PL_SIG (SV, F, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_F1 (tan), 2.96)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-23, 0.7, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0.7, 1.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 1.5, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 100, 0x1p17, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p17, inf, 50000)
+TEST_SIG (SV, F, 1, tan, -3.1, 3.1)
+TEST_ULP (SV_NAME_F1 (tan), 2.96)
+TEST_DISABLE_FENV (SV_NAME_F1 (tan))
+TEST_INTERVAL (SV_NAME_F1 (tan), -0.0, -0x1p126, 100)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-23, 0.7, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0.7, 1.5, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 1.5, 100, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 100, 0x1p17, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p17, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/pl/math/sv_tanh_3u.c b/math/aarch64/sve/tanh.c
similarity index 86%
rename from pl/math/sv_tanh_3u.c
rename to math/aarch64/sve/tanh.c
index f54139f1ddbcc5..41f64cb4b2c74e 100644
--- a/pl/math/sv_tanh_3u.c
+++ b/math/aarch64/sve/tanh.c
@@ -1,14 +1,14 @@
 /*
  * Double-precision SVE tanh(x) function.
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
+#include "sv_poly_f64.h"
 #include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
@@ -89,8 +89,10 @@ svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
   return svdiv_x (pg, q, qp2);
 }
 
-PL_SIG (SV, D, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (tanh), 2.27)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0, 0x1p-27, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
+TEST_SIG (SV, D, 1, tanh, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (tanh), 2.27)
+TEST_DISABLE_FENV (SV_NAME_D1 (tanh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0, 0x1p-27, 5000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/tanhf.c b/math/aarch64/sve/tanhf.c
new file mode 100644
index 00000000000000..9007e7badb0df3
--- /dev/null
+++ b/math/aarch64/sve/tanhf.c
@@ -0,0 +1,68 @@
+/*
+ * Single-precision SVE tanh(x) function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_expm1f_inline.h"
+
+/* Largest value of x for which tanhf(x) rounds to 1 (or -1 for negative).  */
+#define BoringBound 0x1.205966p+3f
+
+static const struct data
+{
+  struct sv_expm1f_data expm1f_consts;
+  uint32_t onef, special_bound;
+  float boring_bound;
+} data = {
+  .expm1f_consts = SV_EXPM1F_DATA,
+  .onef = 0x3f800000,
+  .special_bound = 0x7f800000,
+  .boring_bound = BoringBound,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t pg, svbool_t is_boring,
+	      svfloat32_t boring, svfloat32_t q, svbool_t special)
+{
+  svfloat32_t y
+      = svsel_f32 (is_boring, boring, svdiv_x (pg, q, svadd_x (pg, q, 2.0)));
+  return sv_call_f32 (tanhf, x, y, special);
+}
+
+/* Approximation for single-precision SVE tanh(x), using a simplified
+   version of expm1f. The maximum error is 2.57 ULP:
+   _ZGVsMxv_tanhf (0x1.fc1832p-5) got 0x1.fb71a4p-5
+				 want 0x1.fb71aap-5.  */
+svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svfloat32_t ax = svabs_x (pg, x);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+  svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef));
+  svbool_t special = svcmpgt (pg, iax, d->special_bound);
+  svbool_t is_boring = svacgt (pg, x, d->boring_bound);
+
+  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
+  svfloat32_t q = expm1f_inline (svmul_x (svptrue_b32 (), x, 2.0), pg,
+				 &d->expm1f_consts);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (x, pg, is_boring, boring, q, special);
+  svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0));
+  return svsel_f32 (is_boring, boring, y);
+}
+
+TEST_SIG (SV, F, 1, tanh, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (tanh), 2.07)
+TEST_DISABLE_FENV (SV_NAME_F1 (tanh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0, 0x1p-23, 1000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0x1p-23, BoringBound, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), BoringBound, inf, 100)
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/tanpi.c b/math/aarch64/sve/tanpi.c
new file mode 100644
index 00000000000000..d9e7d2487d533c
--- /dev/null
+++ b/math/aarch64/sve/tanpi.c
@@ -0,0 +1,89 @@
+/*
+ * Double-precision vector tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+const static struct v_tanpi_data
+{
+  double c0, c2, c4, c6, c8, c10, c12;
+  double c1, c3, c5, c7, c9, c11, c13, c14;
+} tanpi_data = {
+  /* Coefficents for tan(pi * x) computed with fpminimax
+     on [ 0x1p-1022 0x1p-2 ]
+     approx rel error: 0x1.7eap-55
+     approx abs error: 0x1.7eap-55.  */
+  .c0 = 0x1.921fb54442d18p1, /* pi.  */
+  .c1 = 0x1.4abbce625be52p3,	.c2 = 0x1.466bc6775b0f9p5,
+  .c3 = 0x1.45fff9b426f5ep7,	.c4 = 0x1.45f4730dbca5cp9,
+  .c5 = 0x1.45f3265994f85p11,	.c6 = 0x1.45f4234b330cap13,
+  .c7 = 0x1.45dca11be79ebp15,	.c8 = 0x1.47283fc5eea69p17,
+  .c9 = 0x1.3a6d958cdefaep19,	.c10 = 0x1.927896baee627p21,
+  .c11 = -0x1.89333f6acd922p19, .c12 = 0x1.5d4e912bb8456p27,
+  .c13 = -0x1.a854d53ab6874p29, .c14 = 0x1.1b76de7681424p32,
+};
+
+/* Approximation for double-precision vector tanpi(x)
+   The maximum error is 3.06 ULP:
+   _ZGVsMxv_tanpi(0x1.0a4a07dfcca3ep-1) got -0x1.fa30112702c98p+3
+				       want -0x1.fa30112702c95p+3.  */
+svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct v_tanpi_data *d = ptr_barrier (&tanpi_data);
+
+  svfloat64_t n = svrintn_x (pg, x);
+
+  /* inf produces nan that propagates.  */
+  svfloat64_t xr = svsub_x (pg, x, n);
+  svfloat64_t ar = svabd_x (pg, x, n);
+  svbool_t flip = svcmpgt (pg, ar, 0.25);
+  svfloat64_t r = svsel (flip, svsubr_x (pg, ar, 0.5), ar);
+
+  /* Order-14 pairwise Horner.  */
+  svfloat64_t r2 = svmul_x (pg, r, r);
+  svfloat64_t r4 = svmul_x (pg, r2, r2);
+
+  svfloat64_t c_1_3 = svld1rq (pg, &d->c1);
+  svfloat64_t c_5_7 = svld1rq (pg, &d->c5);
+  svfloat64_t c_9_11 = svld1rq (pg, &d->c9);
+  svfloat64_t c_13_14 = svld1rq (pg, &d->c13);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r2, c_1_3, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r2, c_1_3, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), r2, c_5_7, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), r2, c_5_7, 1);
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), r2, c_9_11, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), r2, c_9_11, 1);
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), r2, c_13_14, 0);
+
+  svfloat64_t p = svmla_lane (p1213, r4, c_13_14, 1);
+  p = svmad_x (pg, p, r4, p1011);
+  p = svmad_x (pg, p, r4, p89);
+  p = svmad_x (pg, p, r4, p67);
+  p = svmad_x (pg, p, r4, p45);
+  p = svmad_x (pg, p, r4, p23);
+  p = svmad_x (pg, p, r4, p01);
+  p = svmul_x (pg, r, p);
+
+  svfloat64_t p_recip = svdivr_x (pg, p, 1.0);
+  svfloat64_t y = svsel (flip, p_recip, p);
+
+  svuint64_t sign
+      = sveor_x (pg, svreinterpret_u64 (xr), svreinterpret_u64 (ar));
+  return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (SV_NAME_D1 (tanpi))
+TEST_ULP (SV_NAME_D1 (tanpi), 2.57)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 0, 0x1p-31, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 0x1p-31, 0.5, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 0.5, 1.0, 200000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 1.0, 0x1p23, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 0x1p23, inf, 50000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/sve/tanpif.c b/math/aarch64/sve/tanpif.c
new file mode 100644
index 00000000000000..2ba968a799fe06
--- /dev/null
+++ b/math/aarch64/sve/tanpif.c
@@ -0,0 +1,68 @@
+/*
+ * Single-precision vector tanpif(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
+
+const static struct v_tanpif_data
+{
+  float c0, c2, c4, c6;
+  float c1, c3, c5, c7;
+} tanpif_data = {
+  /* Coefficients for tan(pi * x).  */
+  .c0 = 0x1.921fb4p1f,	.c1 = 0x1.4abbcep3f,  .c2 = 0x1.466b8p5f,
+  .c3 = 0x1.461c72p7f,	.c4 = 0x1.42e9d4p9f,  .c5 = 0x1.69e2c4p11f,
+  .c6 = 0x1.e85558p11f, .c7 = 0x1.a52e08p16f,
+};
+
+/* Approximation for single-precision vector tanpif(x)
+   The maximum error is 3.34 ULP:
+   _ZGVsMxv_tanpif(0x1.d6c09ap-2) got 0x1.f70aacp+2
+				 want 0x1.f70aa6p+2.  */
+svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
+  svfloat32_t odd_coeffs = svld1rq (pg, &d->c1);
+  svfloat32_t n = svrintn_x (pg, x);
+
+  /* inf produces nan that propagates.  */
+  svfloat32_t xr = svsub_x (pg, x, n);
+  svfloat32_t ar = svabd_x (pg, x, n);
+  svbool_t flip = svcmpgt (pg, ar, 0.25f);
+  svfloat32_t r = svsel (flip, svsub_x (pg, sv_f32 (0.5f), ar), ar);
+
+  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t r4 = svmul_x (pg, r2, r2);
+
+  /* Order-7 Pairwise Horner.  */
+  svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), r2, odd_coeffs, 0);
+  svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), r2, odd_coeffs, 1);
+  svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), r2, odd_coeffs, 2);
+  svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), r2, odd_coeffs, 3);
+  svfloat32_t p = svmad_x (pg, p67, r4, p45);
+  p = svmad_x (pg, p, r4, p23);
+  p = svmad_x (pg, p, r4, p01);
+  svfloat32_t poly = svmul_x (pg, r, p);
+
+  svfloat32_t poly_recip = svdiv_x (pg, sv_f32 (1.0), poly);
+  svfloat32_t y = svsel (flip, poly_recip, poly);
+
+  svuint32_t sign
+      = sveor_x (pg, svreinterpret_u32 (xr), svreinterpret_u32 (ar));
+  return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (SV_NAME_F1 (tanpi))
+TEST_ULP (SV_NAME_F1 (tanpi), 2.84)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanpi), 0, 0x1p-31, 50000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanpi), 0x1p-31, 0.5, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanpi), 0.5, 0x1p23f, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanpi), 0x1p23f, inf, 100000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/math/aarch64/tanpi_2u5.c b/math/aarch64/tanpi_2u5.c
new file mode 100644
index 00000000000000..154b9faf454d6c
--- /dev/null
+++ b/math/aarch64/tanpi_2u5.c
@@ -0,0 +1,158 @@
+/*
+ * Double-precision scalar tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "mathlib.h"
+#include "math_config.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "poly_scalar_f64.h"
+
+#define SIGN_MASK 0x8000000000000000
+
+const static struct tanpi_data
+{
+  double tan_poly[14], cot_poly[9], pi, invpi;
+} tanpi_data = {
+  /* Coefficents for tan(pi * x).  */
+  .tan_poly = {
+    0x1.4abbce625be52p3,
+    0x1.466bc6775b0f9p5,
+    0x1.45fff9b426f5ep7,
+    0x1.45f4730dbca5cp9,
+    0x1.45f3265994f85p11,
+    0x1.45f4234b330cap13,
+    0x1.45dca11be79ebp15,
+    0x1.47283fc5eea69p17,
+    0x1.3a6d958cdefaep19,
+    0x1.927896baee627p21,
+    -0x1.89333f6acd922p19,
+    0x1.5d4e912bb8456p27,
+    -0x1.a854d53ab6874p29,
+    0x1.1b76de7681424p32,
+  },
+  /* Coefficents for cot(pi * x).  */
+  .cot_poly = {
+    -0x1.0c152382d7366p0,
+    -0x1.60c8539c1d316p-1,
+    -0x1.4b9a2f3516354p-1,
+    -0x1.47474060b6ba8p-1,
+    -0x1.464633ad9dcb1p-1,
+    -0x1.45ff229d7edd6p-1,
+    -0x1.46d8dbf492923p-1,
+    -0x1.3873892311c6bp-1,
+    -0x1.b2f3d0ff96d73p-1,
+  },
+  .pi = 0x1.921fb54442d18p1,
+  .invpi = 0x1.45f306dc9c883p-2,
+};
+
+/* Double-precision scalar tanpi(x) implementation.
+   Maximum error 2.19 ULP:
+   tanpi(0x1.68847e177a855p-2) got 0x1.fe9a0ff9bb9d7p+0
+			      want 0x1.fe9a0ff9bb9d5p+0.  */
+double
+arm_math_tanpi (double x)
+{
+  uint64_t xabs_12 = asuint64 (x) >> 52 & 0x7ff;
+
+  /* x >= 0x1p54.  */
+  if (unlikely (xabs_12 >= 0x434))
+    {
+      /* tanpi(+/-inf) and tanpi(+/-nan) = nan.  */
+      if (unlikely (xabs_12 == 0x7ff))
+	{
+	  return __math_invalid (x);
+	}
+
+      uint64_t x_sign = asuint64 (x) & SIGN_MASK;
+      return asdouble (x_sign);
+    }
+
+  const struct tanpi_data *d = ptr_barrier (&tanpi_data);
+
+  double rounded = round (x);
+  if (unlikely (rounded == x))
+    {
+      /* If x == 0, return with sign.  */
+      if (x == 0)
+	{
+	  return x;
+	}
+      /* Otherwise, return zero with alternating sign.  */
+      int64_t m = (int64_t) rounded;
+      if (x < 0)
+	{
+	  return m & 1 ? 0.0 : -0.0;
+	}
+      else
+	{
+	  return m & 1 ? -0.0 : 0.0;
+	}
+    }
+
+  double x_reduced = x - rounded;
+  double abs_x_reduced = 0.5 - fabs (x_reduced);
+
+  /* Prevent underflow exceptions. x <= 0x1p-63.  */
+  if (unlikely (xabs_12 < 0x3c0))
+    {
+      return d->pi * x;
+    }
+
+  double result, offset, scale;
+
+  /* Test  0.25 < abs_x < 0.5 independent from abs_x_reduced.  */
+  double x2 = x + x;
+  int64_t rounded_x2 = (int64_t) round (x2);
+  if (rounded_x2 & 1)
+    {
+      double r_x = abs_x_reduced;
+
+      double r_x2 = r_x * r_x;
+      double r_x4 = r_x2 * r_x2;
+
+      uint64_t sign = asuint64 (x_reduced) & SIGN_MASK;
+      r_x = asdouble (asuint64 (r_x) ^ sign);
+
+      // calculate sign for half-fractional inf values
+      uint64_t is_finite = asuint64 (abs_x_reduced);
+      uint64_t is_odd = (rounded_x2 & 2) << 62;
+      uint64_t is_neg = rounded_x2 & SIGN_MASK;
+      uint64_t keep_sign = is_finite | (is_odd ^ is_neg);
+      offset = d->invpi / (keep_sign ? r_x : -r_x);
+      scale = r_x;
+
+      result = pw_horner_8_f64 (r_x2, r_x4, d->cot_poly);
+    }
+  else
+    {
+      double r_x2 = x_reduced * x_reduced;
+      double r_x4 = r_x2 * r_x2;
+
+      offset = d->pi * x_reduced;
+      scale = x_reduced * r_x2;
+
+      result = pw_horner_13_f64 (r_x2, r_x4, d->tan_poly);
+    }
+
+  return fma (scale, result, offset);
+}
+
+#if WANT_EXPERIMENTAL_MATH
+double
+tanpi (double x)
+{
+  return arm_math_tanpi (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_tanpi, 1.69)
+TEST_SYM_INTERVAL (arm_math_tanpi, 0, 0x1p-63, 50000)
+TEST_SYM_INTERVAL (arm_math_tanpi, 0x1p-63, 0.5, 100000)
+TEST_SYM_INTERVAL (arm_math_tanpi, 0.5, 0x1p53, 100000)
+TEST_SYM_INTERVAL (arm_math_tanpi, 0x1p53, inf, 100000)
+#endif
diff --git a/math/aarch64/tanpif_3u1.c b/math/aarch64/tanpif_3u1.c
new file mode 100644
index 00000000000000..8cd66594c290e6
--- /dev/null
+++ b/math/aarch64/tanpif_3u1.c
@@ -0,0 +1,145 @@
+/*
+ * Single-precision scalar tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "mathlib.h"
+#include "math_config.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "poly_scalar_f32.h"
+
+const static struct tanpif_data
+{
+  float tan_poly[6], cot_poly[4], pi, invpi;
+} tanpif_data = {
+  /* Coefficents for tan(pi * x).  */
+  .tan_poly = {
+    0x1.4abbc8p3,
+    0x1.467284p5,
+    0x1.44cf12p7,
+    0x1.596b5p9,
+    0x1.753858p10,
+    0x1.76ff52p14,
+  },
+  /* Coefficents for cot(pi * x).  */
+  .cot_poly = {
+    -0x1.0c1522p0,
+    -0x1.60ce32p-1,
+    -0x1.49cd42p-1,
+    -0x1.73f786p-1,
+  },
+  .pi = 0x1.921fb6p1f,
+  .invpi = 0x1.45f308p-2f,
+};
+
+/* Single-precision scalar tanpi(x) implementation.
+   Maximum error 2.56 ULP:
+   tanpif(0x1.4bf948p-1) got -0x1.fcc9ep+0
+			want -0x1.fcc9e6p+0.  */
+float
+arm_math_tanpif (float x)
+{
+  uint32_t xabs_12 = asuint (x) >> 20 & 0x7f8;
+
+  /* x >= 0x1p24f.  */
+  if (unlikely (xabs_12 >= 0x4b1))
+    {
+      /* tanpif(+/-inf) and tanpif(+/-nan) = nan.  */
+      if (unlikely (xabs_12 == 0x7f8))
+	{
+	  return __math_invalidf (x);
+	}
+
+      uint32_t x_sign = asuint (x) & 0x80000000;
+      return asfloat (x_sign);
+    }
+
+  const struct tanpif_data *d = ptr_barrier (&tanpif_data);
+
+  /* Prevent underflow exceptions. x <= 0x1p-31.  */
+  if (unlikely (xabs_12 < 0x300))
+    {
+      return d->pi * x;
+    }
+
+  float rounded = roundf (x);
+  if (unlikely (rounded == x))
+    {
+      /* If x == 0, return with sign.  */
+      if (x == 0)
+	{
+	  return x;
+	}
+      /* Otherwise, return zero with alternating sign.  */
+      int32_t m = (int32_t) rounded;
+      if (x < 0)
+	{
+	  return m & 1 ? 0.0f : -0.0f;
+	}
+      else
+	{
+	  return m & 1 ? -0.0f : 0.0f;
+	}
+    }
+
+  float x_reduced = x - rounded;
+  float abs_x_reduced = 0.5f - asfloat (asuint (x_reduced) & 0x7fffffff);
+
+  float result, offset, scale;
+
+  /* Test  0.25 < abs_x < 0.5 independent from abs_x_reduced.  */
+  float x2 = x + x;
+  int32_t rounded_x2 = (int32_t) roundf (x2);
+  if (rounded_x2 & 1)
+    {
+      float r_x = abs_x_reduced;
+
+      float r_x2 = r_x * r_x;
+      float r_x4 = r_x2 * r_x2;
+
+      uint32_t sign = asuint (x_reduced) & 0x80000000;
+      r_x = asfloat (asuint (r_x) ^ sign);
+
+      // calculate sign for half-fractional inf values
+      uint32_t is_finite = asuint (abs_x_reduced);
+      uint32_t is_odd = (rounded_x2 & 2) << 30;
+      uint32_t is_neg = rounded_x2 & 0x80000000;
+      uint32_t keep_sign = is_finite | (is_odd ^ is_neg);
+      offset = d->invpi / (keep_sign ? r_x : -r_x);
+      scale = r_x;
+
+      result = pairwise_poly_3_f32 (r_x2, r_x4, d->cot_poly);
+    }
+  else
+    {
+      float r_x = x_reduced;
+
+      float r_x2 = r_x * r_x;
+      float r_x4 = r_x2 * r_x2;
+
+      offset = d->pi * r_x;
+      scale = r_x * r_x2;
+
+      result = pw_horner_5_f32 (r_x2, r_x4, d->tan_poly);
+    }
+
+  return fmaf (scale, result, offset);
+}
+
+#if WANT_EXPERIMENTAL_MATH
+float
+tanpif (float x)
+{
+  return arm_math_tanpif (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_tanpif, 2.57)
+TEST_SYM_INTERVAL (arm_math_tanpif, 0, 0x1p-31f, 50000)
+TEST_SYM_INTERVAL (arm_math_tanpif, 0x1p-31f, 0.5, 100000)
+TEST_SYM_INTERVAL (arm_math_tanpif, 0.5, 0x1p23f, 100000)
+TEST_SYM_INTERVAL (arm_math_tanpif, 0x1p23f, inf, 100000)
+#endif
diff --git a/pl/math/erf_data.c b/math/aarch64/v_erf_data.c
similarity index 99%
rename from pl/math/erf_data.c
rename to math/aarch64/v_erf_data.c
index 138e03578e77cf..5400d6b8d0e300 100644
--- a/pl/math/erf_data.c
+++ b/math/aarch64/v_erf_data.c
@@ -1,20 +1,20 @@
 /*
  * Data for approximation of erf.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* Lookup table used in erf.
+/* Lookup table used in vector erf.
    For each possible rounded input r (multiples of 1/128), between
    r = 0.0 and r = 6.0 (769 values):
-   - the first entry __erff_data.tab.erf contains the values of erf(r),
-   - the second entry __erff_data.tab.scale contains the values of
+   - the first entry __v_erff_data.tab.erf contains the values of erf(r),
+   - the second entry __v_erff_data.tab.scale contains the values of
    2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
    algorithm, since lookup is performed only for x >= 1/64-1/512.  */
-const struct erf_data __erf_data = {
+const struct v_erf_data __v_erf_data = {
   .tab = { { 0x0.0000000000000p+0, 0x1.20dd750429b6dp+0 },
 	   { 0x1.20dbf3deb1340p-7, 0x1.20d8f1975c85dp+0 },
 	   { 0x1.20d77083f17a0p-6, 0x1.20cb67bd452c7p+0 },
diff --git a/pl/math/erfc_data.c b/math/aarch64/v_erfc_data.c
similarity index 99%
rename from pl/math/erfc_data.c
rename to math/aarch64/v_erfc_data.c
index 40f72a4d6d5b63..6acd96f74be5f0 100644
--- a/pl/math/erfc_data.c
+++ b/math/aarch64/v_erfc_data.c
@@ -1,20 +1,20 @@
 /*
  * Data used in double-precision erfc(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* Lookup table used in erfc.
+/* Lookup table used in vector erfc.
    For each possible rounded input r (multiples of 1/128), between
    r = 0.0 and r = ~27.0 (3488 values):
-   - the first entry __erfc_data.tab.erfc contains the values of erfc(r),
-   - the second entry __erfc_data.tab.scale contains the values of
+   - the first entry __v_erfc_data.tab.erfc contains the values of erfc(r),
+   - the second entry __v_erfc_data.tab.scale contains the values of
    2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore
    they are scaled by a large enough value 2^128 (fits in 8bit).  */
-const struct erfc_data __erfc_data = {
+const struct v_erfc_data __v_erfc_data = {
   .tab = { { 0x1p128, 0x1.20dd750429b6dp128 },
 	   { 0x1.fb7c9030853b3p127, 0x1.20d8f1975c85dp128 },
 	   { 0x1.f6f9447be0743p127, 0x1.20cb67bd452c7p128 },
diff --git a/pl/math/erfcf_data.c b/math/aarch64/v_erfcf_data.c
similarity index 98%
rename from pl/math/erfcf_data.c
rename to math/aarch64/v_erfcf_data.c
index a54e11973819df..9f992b4887fb16 100644
--- a/pl/math/erfcf_data.c
+++ b/math/aarch64/v_erfcf_data.c
@@ -1,20 +1,20 @@
 /*
  * Data used in single-precision erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* Lookup table used in erfcf.
+/* Lookup table used in vector erfcf.
    For each possible rounded input r (multiples of 1/64), between
    r = 0.0 and r = 10.0625 (645 values):
-   - the first entry __erfcf_data.tab.erfc contains the values of erfc(r),
-   - the second entry __erfcf_data.tab.scale contains the values of
+   - the first entry __v_erfcf_data.tab.erfc contains the values of erfc(r),
+   - the second entry __v_erfcf_data.tab.scale contains the values of
    2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore
    they are scaled by a large enough value 2^47 (fits in 8 bits).  */
-const struct erfcf_data __erfcf_data = {
+const struct v_erfcf_data __v_erfcf_data = {
   .tab = { { 0x1p47, 0x1.20dd76p47 },
 	   { 0x1.f6f944p46, 0x1.20cb68p47 },
 	   { 0x1.edf3aap46, 0x1.209546p47 },
diff --git a/pl/math/erff_data.c b/math/aarch64/v_erff_data.c
similarity index 98%
rename from pl/math/erff_data.c
rename to math/aarch64/v_erff_data.c
index 84c0d2e9546316..8d11d8b6c10bb6 100644
--- a/pl/math/erff_data.c
+++ b/math/aarch64/v_erff_data.c
@@ -1,20 +1,20 @@
 /*
  * Data for approximation of erff.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* Lookup table used in erff.
+/* Lookup table used in vector erff.
    For each possible rounded input r (multiples of 1/128), between
    r = 0.0 and r = 4.0 (513 values):
-   - the first entry __erff_data.tab.erf contains the values of erf(r),
-   - the second entry __erff_data.tab.scale contains the values of
+   - the first entry __v_erff_data.tab.erf contains the values of erf(r),
+   - the second entry __v_erff_data.tab.scale contains the values of
    2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
    algorithm, since lookup is performed only for x >= 1/64-1/512.  */
-const struct erff_data __erff_data = {
+const struct v_erff_data __v_erff_data = {
   .tab = { { 0x0.000000p+0, 0x1.20dd76p+0 },
 	   { 0x1.20dbf4p-7, 0x1.20d8f2p+0 },
 	   { 0x1.20d770p-6, 0x1.20cb68p+0 },
diff --git a/math/aarch64/v_exp2f_1u.c b/math/aarch64/v_exp2f_1u.c
deleted file mode 100644
index ba6b02fbb4bcbd..00000000000000
--- a/math/aarch64/v_exp2f_1u.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Single-precision vector 2^x function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-static const float Poly[] = {
-  /*  maxerr: 0.878 ulp.  */
-  0x1.416b5ep-13f, 0x1.5f082ep-10f, 0x1.3b2dep-7f, 0x1.c6af7cp-5f, 0x1.ebfbdcp-3f, 0x1.62e43p-1f
-};
-#define C0 v_f32 (Poly[0])
-#define C1 v_f32 (Poly[1])
-#define C2 v_f32 (Poly[2])
-#define C3 v_f32 (Poly[3])
-#define C4 v_f32 (Poly[4])
-#define C5 v_f32 (Poly[5])
-
-#define Shift v_f32 (0x1.8p23f)
-#define InvLn2 v_f32 (0x1.715476p+0f)
-#define Ln2hi v_f32 (0x1.62e4p-1f)
-#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
-
-static float32x4_t VPCS_ATTR NOINLINE
-specialcase (float32x4_t poly, float32x4_t n, uint32x4_t e, float32x4_t absn)
-{
-  /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = (n <= v_f32 (0.0f)) & v_u32 (0x83000000);
-  float32x4_t s1 = vreinterpretq_f32_u32 (v_u32 (0x7f000000) + b);
-  float32x4_t s2 = vreinterpretq_f32_u32 (e - b);
-  uint32x4_t cmp = absn > v_f32 (192.0f);
-  float32x4_t r1 = s1 * s1;
-  float32x4_t r0 = poly * s1 * s2;
-  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
-				| (~cmp & vreinterpretq_u32_f32 (r0)));
-}
-
-float32x4_t VPCS_ATTR
-_ZGVnN4v_exp2f_1u (float32x4_t x)
-{
-  float32x4_t n, r, scale, poly, absn;
-  uint32x4_t cmp, e;
-
-  /* exp2(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
-     x = n + r, with r in [-1/2, 1/2].  */
-#if 0
-  float32x4_t z;
-  z = x + Shift;
-  n = z - Shift;
-  r = x - n;
-  e = vreinterpretq_u32_f32 (z) << 23;
-#else
-  n = vrndaq_f32 (x);
-  r = x - n;
-  e = vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)) << 23;
-#endif
-  scale = vreinterpretq_f32_u32 (e + v_u32 (0x3f800000));
-  absn = vabsq_f32 (n);
-  cmp = absn > v_f32 (126.0f);
-  poly = vfmaq_f32 (C1, C0, r);
-  poly = vfmaq_f32 (C2, poly, r);
-  poly = vfmaq_f32 (C3, poly, r);
-  poly = vfmaq_f32 (C4, poly, r);
-  poly = vfmaq_f32 (C5, poly, r);
-  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (poly, n, e, absn);
-  return scale * poly;
-}
diff --git a/math/aarch64/v_exp_data.c b/math/aarch64/v_exp_data.c
index 45f0848cac5b5b..59db77ac58ccc8 100644
--- a/math/aarch64/v_exp_data.c
+++ b/math/aarch64/v_exp_data.c
@@ -1,17 +1,14 @@
 /*
- * Lookup table for double-precision e^x vector function.
+ * Scale values for vector exp and exp2
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "v_math.h"
+#include "math_config.h"
 
-# define N (1 << V_EXP_TABLE_BITS)
-
-/* 2^(j/N), j=0..N.  */
+/* 2^(j/N), j=0..N, N=2^7=128.  */
 const uint64_t __v_exp_data[] = {
-# if N == 128
   0x3ff0000000000000, 0x3feff63da9fb3335, 0x3fefec9a3e778061,
   0x3fefe315e86e7f85, 0x3fefd9b0d3158574, 0x3fefd06b29ddf6de,
   0x3fefc74518759bc8, 0x3fefbe3ecac6f383, 0x3fefb5586cf9890f,
@@ -55,92 +52,4 @@ const uint64_t __v_exp_data[] = {
   0x3fefa4afa2a490da, 0x3fefaf482d8e67f1, 0x3fefba1bee615a27,
   0x3fefc52b376bba97, 0x3fefd0765b6e4540, 0x3fefdbfdad9cbe14,
   0x3fefe7c1819e90d8, 0x3feff3c22b8f71f1,
-# elif N == 256
-  0x3ff0000000000000, 0x3feffb1afa5abcbf, 0x3feff63da9fb3335,
-  0x3feff168143b0281, 0x3fefec9a3e778061, 0x3fefe7d42e11bbcc,
-  0x3fefe315e86e7f85, 0x3fefde5f72f654b1, 0x3fefd9b0d3158574,
-  0x3fefd50a0e3c1f89, 0x3fefd06b29ddf6de, 0x3fefcbd42b72a836,
-  0x3fefc74518759bc8, 0x3fefc2bdf66607e0, 0x3fefbe3ecac6f383,
-  0x3fefb9c79b1f3919, 0x3fefb5586cf9890f, 0x3fefb0f145e46c85,
-  0x3fefac922b7247f7, 0x3fefa83b23395dec, 0x3fefa3ec32d3d1a2,
-  0x3fef9fa55fdfa9c5, 0x3fef9b66affed31b, 0x3fef973028d7233e,
-  0x3fef9301d0125b51, 0x3fef8edbab5e2ab6, 0x3fef8abdc06c31cc,
-  0x3fef86a814f204ab, 0x3fef829aaea92de0, 0x3fef7e95934f312e,
-  0x3fef7a98c8a58e51, 0x3fef76a45471c3c2, 0x3fef72b83c7d517b,
-  0x3fef6ed48695bbc0, 0x3fef6af9388c8dea, 0x3fef672658375d2f,
-  0x3fef635beb6fcb75, 0x3fef5f99f8138a1c, 0x3fef5be084045cd4,
-  0x3fef582f95281c6b, 0x3fef54873168b9aa, 0x3fef50e75eb44027,
-  0x3fef4d5022fcd91d, 0x3fef49c18438ce4d, 0x3fef463b88628cd6,
-  0x3fef42be3578a819, 0x3fef3f49917ddc96, 0x3fef3bdda27912d1,
-  0x3fef387a6e756238, 0x3fef351ffb82140a, 0x3fef31ce4fb2a63f,
-  0x3fef2e85711ece75, 0x3fef2b4565e27cdd, 0x3fef280e341ddf29,
-  0x3fef24dfe1f56381, 0x3fef21ba7591bb70, 0x3fef1e9df51fdee1,
-  0x3fef1b8a66d10f13, 0x3fef187fd0dad990, 0x3fef157e39771b2f,
-  0x3fef1285a6e4030b, 0x3fef0f961f641589, 0x3fef0cafa93e2f56,
-  0x3fef09d24abd886b, 0x3fef06fe0a31b715, 0x3fef0432edeeb2fd,
-  0x3fef0170fc4cd831, 0x3feefeb83ba8ea32, 0x3feefc08b26416ff,
-  0x3feef96266e3fa2d, 0x3feef6c55f929ff1, 0x3feef431a2de883b,
-  0x3feef1a7373aa9cb, 0x3feeef26231e754a, 0x3feeecae6d05d866,
-  0x3feeea401b7140ef, 0x3feee7db34e59ff7, 0x3feee57fbfec6cf4,
-  0x3feee32dc313a8e5, 0x3feee0e544ede173, 0x3feedea64c123422,
-  0x3feedc70df1c5175, 0x3feeda4504ac801c, 0x3feed822c367a024,
-  0x3feed60a21f72e2a, 0x3feed3fb2709468a, 0x3feed1f5d950a897,
-  0x3feecffa3f84b9d4, 0x3feece086061892d, 0x3feecc2042a7d232,
-  0x3feeca41ed1d0057, 0x3feec86d668b3237, 0x3feec6a2b5c13cd0,
-  0x3feec4e1e192aed2, 0x3feec32af0d7d3de, 0x3feec17dea6db7d7,
-  0x3feebfdad5362a27, 0x3feebe41b817c114, 0x3feebcb299fddd0d,
-  0x3feebb2d81d8abff, 0x3feeb9b2769d2ca7, 0x3feeb8417f4531ee,
-  0x3feeb6daa2cf6642, 0x3feeb57de83f4eef, 0x3feeb42b569d4f82,
-  0x3feeb2e2f4f6ad27, 0x3feeb1a4ca5d920f, 0x3feeb070dde910d2,
-  0x3feeaf4736b527da, 0x3feeae27dbe2c4cf, 0x3feead12d497c7fd,
-  0x3feeac0827ff07cc, 0x3feeab07dd485429, 0x3feeaa11fba87a03,
-  0x3feea9268a5946b7, 0x3feea84590998b93, 0x3feea76f15ad2148,
-  0x3feea6a320dceb71, 0x3feea5e1b976dc09, 0x3feea52ae6cdf6f4,
-  0x3feea47eb03a5585, 0x3feea3dd1d1929fd, 0x3feea34634ccc320,
-  0x3feea2b9febc8fb7, 0x3feea23882552225, 0x3feea1c1c70833f6,
-  0x3feea155d44ca973, 0x3feea0f4b19e9538, 0x3feea09e667f3bcd,
-  0x3feea052fa75173e, 0x3feea012750bdabf, 0x3fee9fdcddd47645,
-  0x3fee9fb23c651a2f, 0x3fee9f9298593ae5, 0x3fee9f7df9519484,
-  0x3fee9f7466f42e87, 0x3fee9f75e8ec5f74, 0x3fee9f8286ead08a,
-  0x3fee9f9a48a58174, 0x3fee9fbd35d7cbfd, 0x3fee9feb564267c9,
-  0x3feea024b1ab6e09, 0x3feea0694fde5d3f, 0x3feea0b938ac1cf6,
-  0x3feea11473eb0187, 0x3feea17b0976cfdb, 0x3feea1ed0130c132,
-  0x3feea26a62ff86f0, 0x3feea2f336cf4e62, 0x3feea3878491c491,
-  0x3feea427543e1a12, 0x3feea4d2add106d9, 0x3feea589994cce13,
-  0x3feea64c1eb941f7, 0x3feea71a4623c7ad, 0x3feea7f4179f5b21,
-  0x3feea8d99b4492ed, 0x3feea9cad931a436, 0x3feeaac7d98a6699,
-  0x3feeabd0a478580f, 0x3feeace5422aa0db, 0x3feeae05bad61778,
-  0x3feeaf3216b5448c, 0x3feeb06a5e0866d9, 0x3feeb1ae99157736,
-  0x3feeb2fed0282c8a, 0x3feeb45b0b91ffc6, 0x3feeb5c353aa2fe2,
-  0x3feeb737b0cdc5e5, 0x3feeb8b82b5f98e5, 0x3feeba44cbc8520f,
-  0x3feebbdd9a7670b3, 0x3feebd829fde4e50, 0x3feebf33e47a22a2,
-  0x3feec0f170ca07ba, 0x3feec2bb4d53fe0d, 0x3feec49182a3f090,
-  0x3feec674194bb8d5, 0x3feec86319e32323, 0x3feeca5e8d07f29e,
-  0x3feecc667b5de565, 0x3feece7aed8eb8bb, 0x3feed09bec4a2d33,
-  0x3feed2c980460ad8, 0x3feed503b23e255d, 0x3feed74a8af46052,
-  0x3feed99e1330b358, 0x3feedbfe53c12e59, 0x3feede6b5579fdbf,
-  0x3feee0e521356eba, 0x3feee36bbfd3f37a, 0x3feee5ff3a3c2774,
-  0x3feee89f995ad3ad, 0x3feeeb4ce622f2ff, 0x3feeee07298db666,
-  0x3feef0ce6c9a8952, 0x3feef3a2b84f15fb, 0x3feef68415b749b1,
-  0x3feef9728de5593a, 0x3feefc6e29f1c52a, 0x3feeff76f2fb5e47,
-  0x3fef028cf22749e4, 0x3fef05b030a1064a, 0x3fef08e0b79a6f1f,
-  0x3fef0c1e904bc1d2, 0x3fef0f69c3f3a207, 0x3fef12c25bd71e09,
-  0x3fef16286141b33d, 0x3fef199bdd85529c, 0x3fef1d1cd9fa652c,
-  0x3fef20ab5fffd07a, 0x3fef244778fafb22, 0x3fef27f12e57d14b,
-  0x3fef2ba88988c933, 0x3fef2f6d9406e7b5, 0x3fef33405751c4db,
-  0x3fef3720dcef9069, 0x3fef3b0f2e6d1675, 0x3fef3f0b555dc3fa,
-  0x3fef43155b5bab74, 0x3fef472d4a07897c, 0x3fef4b532b08c968,
-  0x3fef4f87080d89f2, 0x3fef53c8eacaa1d6, 0x3fef5818dcfba487,
-  0x3fef5c76e862e6d3, 0x3fef60e316c98398, 0x3fef655d71ff6075,
-  0x3fef69e603db3285, 0x3fef6e7cd63a8315, 0x3fef7321f301b460,
-  0x3fef77d5641c0658, 0x3fef7c97337b9b5f, 0x3fef81676b197d17,
-  0x3fef864614f5a129, 0x3fef8b333b16ee12, 0x3fef902ee78b3ff6,
-  0x3fef953924676d76, 0x3fef9a51fbc74c83, 0x3fef9f7977cdb740,
-  0x3fefa4afa2a490da, 0x3fefa9f4867cca6e, 0x3fefaf482d8e67f1,
-  0x3fefb4aaa2188510, 0x3fefba1bee615a27, 0x3fefbf9c1cb6412a,
-  0x3fefc52b376bba97, 0x3fefcac948dd7274, 0x3fefd0765b6e4540,
-  0x3fefd632798844f8, 0x3fefdbfdad9cbe14, 0x3fefe1d802243c89,
-  0x3fefe7c1819e90d8, 0x3fefedba3692d514, 0x3feff3c22b8f71f1,
-  0x3feff9d96b2a23d9,
-# endif
 };
diff --git a/pl/math/v_exp_tail_data.c b/math/aarch64/v_exp_tail_data.c
similarity index 98%
rename from pl/math/v_exp_tail_data.c
rename to math/aarch64/v_exp_tail_data.c
index 989dd41d949a59..5cc58a40b6b7d1 100644
--- a/pl/math/v_exp_tail_data.c
+++ b/math/aarch64/v_exp_tail_data.c
@@ -1,13 +1,13 @@
 /*
  * Lookup table for double-precision e^x vector function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* 2^(j/N), j=0..N, N=2^8=256. Copied from math/v_exp_data.c.  */
+/* 2^(j/N), j=0..N, N=2^8=256.  */
 const uint64_t __v_exp_tail_data[] = {
   0x3ff0000000000000, 0x3feffb1afa5abcbf, 0x3feff63da9fb3335,
   0x3feff168143b0281, 0x3fefec9a3e778061, 0x3fefe7d42e11bbcc,
diff --git a/math/aarch64/v_expf_1u.c b/math/aarch64/v_expf_1u.c
deleted file mode 100644
index 43d03fa34efab4..00000000000000
--- a/math/aarch64/v_expf_1u.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Single-precision vector e^x function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-static const float Poly[] = {
-  /*  maxerr: 0.36565 +0.5 ulp.  */
-  0x1.6a6000p-10f,
-  0x1.12718ep-7f,
-  0x1.555af0p-5f,
-  0x1.555430p-3f,
-  0x1.fffff4p-2f,
-};
-#define C0 v_f32 (Poly[0])
-#define C1 v_f32 (Poly[1])
-#define C2 v_f32 (Poly[2])
-#define C3 v_f32 (Poly[3])
-#define C4 v_f32 (Poly[4])
-
-#define Shift v_f32 (0x1.8p23f)
-#define InvLn2 v_f32 (0x1.715476p+0f)
-#define Ln2hi v_f32 (0x1.62e4p-1f)
-#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
-
-static float32x4_t VPCS_ATTR NOINLINE
-specialcase (float32x4_t poly, float32x4_t n, uint32x4_t e, float32x4_t absn)
-{
-  /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = (n <= v_f32 (0.0f)) & v_u32 (0x83000000);
-  float32x4_t s1 = vreinterpretq_f32_u32 (v_u32 (0x7f000000) + b);
-  float32x4_t s2 = vreinterpretq_f32_u32 (e - b);
-  uint32x4_t cmp = absn > v_f32 (192.0f);
-  float32x4_t r1 = s1 * s1;
-  float32x4_t r0 = poly * s1 * s2;
-  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
-				| (~cmp & vreinterpretq_u32_f32 (r0)));
-}
-
-float32x4_t VPCS_ATTR
-_ZGVnN4v_expf_1u (float32x4_t x)
-{
-  float32x4_t n, r, scale, poly, absn, z;
-  uint32x4_t cmp, e;
-
-  /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-#if 1
-  z = vfmaq_f32 (Shift, x, InvLn2);
-  n = z - Shift;
-  r = vfmaq_f32 (x, n, -Ln2hi);
-  r = vfmaq_f32 (r, n, -Ln2lo);
-  e = vreinterpretq_u32_f32 (z) << 23;
-#else
-  z = x * InvLn2;
-  n = vrndaq_f32 (z);
-  r = vfmaq_f32 (x, n, -Ln2hi);
-  r = vfmaq_f32 (r, n, -Ln2lo);
-  e = vreinterpretq_u32_s32 (vcvtaq_s32_f32 (z)) << 23;
-#endif
-  scale = vreinterpretq_f32_u32 (e + v_u32 (0x3f800000));
-  absn = vabsq_f32 (n);
-  cmp = absn > v_f32 (126.0f);
-  poly = vfmaq_f32 (C1, C0, r);
-  poly = vfmaq_f32 (C2, poly, r);
-  poly = vfmaq_f32 (C3, poly, r);
-  poly = vfmaq_f32 (C4, poly, r);
-  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
-  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (poly, n, e, absn);
-  return scale * poly;
-}
diff --git a/math/aarch64/v_log.c b/math/aarch64/v_log.c
deleted file mode 100644
index 1d1c1fa62c0423..00000000000000
--- a/math/aarch64/v_log.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Double-precision vector log(x) function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-static const struct data
-{
-  uint64x2_t min_norm;
-  uint32x4_t special_bound;
-  float64x2_t poly[5];
-  float64x2_t ln2;
-  uint64x2_t sign_exp_mask;
-} data = {
-  /* Worst-case error: 1.17 + 0.5 ulp.
-     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
-  .poly = { V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2),
-	    V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3),
-	    V2 (-0x1.554e550bd501ep-3) },
-  .ln2 = V2 (0x1.62e42fefa39efp-1),
-  .min_norm = V2 (0x0010000000000000),
-  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm.  */
-  .sign_exp_mask = V2 (0xfff0000000000000)
-};
-
-#define A(i) d->poly[i]
-#define N (1 << V_LOG_TABLE_BITS)
-#define IndexMask (N - 1)
-#define Off v_u64 (0x3fe6900900000000)
-
-struct entry
-{
-  float64x2_t invc;
-  float64x2_t logc;
-};
-
-static inline struct entry
-lookup (uint64x2_t i)
-{
-  /* Since N is a power of 2, n % N = n & (N - 1).  */
-  struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
-  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
-  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
-  e.invc = vuzp1q_f64 (e0, e1);
-  e.logc = vuzp2q_f64 (e0, e1);
-  return e;
-}
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2,
-	      uint32x2_t cmp)
-{
-  return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (cmp));
-}
-
-float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  float64x2_t z, r, r2, p, y, kd, hi;
-  uint64x2_t ix, iz, tmp;
-  uint32x2_t cmp;
-  int64x2_t k;
-  struct entry e;
-
-  ix = vreinterpretq_u64_f64 (x);
-  cmp = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
-		  vget_low_u32 (d->special_bound));
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  tmp = vsubq_u64 (ix, Off);
-  k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
-  iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
-  z = vreinterpretq_f64_u64 (iz);
-  e = lookup (tmp);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-  r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
-  kd = vcvtq_f64_s64 (k);
-
-  /* hi = r + log(c) + k*Ln2.  */
-  hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  r2 = vmulq_f64 (r, r);
-  y = vfmaq_f64 (A (2), A (3), r);
-  p = vfmaq_f64 (A (0), A (1), r);
-  y = vfmaq_f64 (y, A (4), r2);
-  y = vfmaq_f64 (p, y, r2);
-
-  if (unlikely (v_any_u32h (cmp)))
-    return special_case (x, y, hi, r2, cmp);
-  return vfmaq_f64 (hi, y, r2);
-}
diff --git a/pl/math/v_log10_data.c b/math/aarch64/v_log10_data.c
similarity index 99%
rename from pl/math/v_log10_data.c
rename to math/aarch64/v_log10_data.c
index d9a624dab9ce11..bae2685822f6d0 100644
--- a/pl/math/v_log10_data.c
+++ b/math/aarch64/v_log10_data.c
@@ -1,7 +1,7 @@
 /*
  * Lookup table for double-precision log10(x) vector function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/v_log2_data.c b/math/aarch64/v_log2_data.c
similarity index 99%
rename from pl/math/v_log2_data.c
rename to math/aarch64/v_log2_data.c
index 50697daff925ae..fad91d654da8e0 100644
--- a/pl/math/v_log2_data.c
+++ b/math/aarch64/v_log2_data.c
@@ -1,7 +1,7 @@
 /*
  * Coefficients and table entries for vector log2
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/math/aarch64/v_log_data.c b/math/aarch64/v_log_data.c
index 82351bb14766f2..4f0e6e16738112 100644
--- a/math/aarch64/v_log_data.c
+++ b/math/aarch64/v_log_data.c
@@ -1,30 +1,35 @@
 /*
  * Lookup table for double-precision log(x) vector function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "v_math.h"
-
-#define N (1 << V_LOG_TABLE_BITS)
+#include "math_config.h"
 
 const struct v_log_data __v_log_data = {
+  /* Worst-case error: 1.17 + 0.5 ulp.
+     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
+  .poly = { -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,
+	    0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3 },
+  .ln2 = 0x1.62e42fefa39efp-1,
   /* Algorithm:
 
 	x = 2^k z
 	log(x) = k ln2 + log(c) + poly(z/c - 1)
 
-  where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
-  N=128) and log(c) and 1/c for the ith subinterval comes from lookup tables:
+     where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
+     N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
+     tables:
 
 	table[i].invc = 1/c
 	table[i].logc = (double)log(c)
 
-  where c is near the center of the subinterval and is chosen by trying several
-  floating point invc candidates around 1/center and selecting one for which
-  the error in (double)log(c) is minimized (< 0x1p-74), except the subinterval
-  that contains 1 and the previous one got tweaked to avoid cancellation.  */
+     where c is near the center of the subinterval and is chosen by trying
+     several floating point invc candidates around 1/center and selecting one
+     for which the error in (double)log(c) is minimized (< 0x1p-74), except the
+     subinterval that contains 1 and the previous one got tweaked to avoid
+     cancellation.  */
   .table = { { 0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2 },
 	     { 0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2 },
 	     { 0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2 },
diff --git a/math/aarch64/v_logf.c b/math/aarch64/v_logf.c
deleted file mode 100644
index 66ebbbcd2b5a84..00000000000000
--- a/math/aarch64/v_logf.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Single-precision vector log function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-static const struct data
-{
-  uint32x4_t min_norm;
-  uint16x8_t special_bound;
-  float32x4_t poly[7];
-  float32x4_t ln2, tiny_bound;
-  uint32x4_t off, mantissa_mask;
-} data = {
-  /* 3.34 ulp error.  */
-  .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f),
-	    V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f),
-	    V4 (-0x1.ffffc8p-2f) },
-  .ln2 = V4 (0x1.62e43p-1f),
-  .tiny_bound = V4 (0x1p-126),
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
-  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
-  .mantissa_mask = V4 (0x007fffff)
-};
-
-#define P(i) d->poly[7 - i]
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p,
-	      uint16x4_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
-}
-
-float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  float32x4_t n, p, q, r, r2, y;
-  uint32x4_t u;
-  uint16x4_t cmp;
-
-  u = vreinterpretq_u32_f32 (x);
-  cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-		  vget_low_u16 (d->special_bound));
-
-  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
-  n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vandq_u32 (u, d->mantissa_mask);
-  u = vaddq_u32 (u, d->off);
-  r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
-
-  /* y = log(1+r) + n*ln2.  */
-  r2 = vmulq_f32 (r, r);
-  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
-  p = vfmaq_f32 (P (5), P (6), r);
-  q = vfmaq_f32 (P (3), P (4), r);
-  y = vfmaq_f32 (P (1), P (2), r);
-  p = vfmaq_f32 (p, P (7), r2);
-  q = vfmaq_f32 (q, p, r2);
-  y = vfmaq_f32 (y, q, r2);
-  p = vfmaq_f32 (r, d->ln2, n);
-
-  if (unlikely (v_any_u16h (cmp)))
-    return special_case (x, y, r2, p, cmp);
-  return vfmaq_f32 (p, y, r2);
-}
diff --git a/math/aarch64/v_math.h b/math/aarch64/v_math.h
deleted file mode 100644
index 1dc9916c6fb076..00000000000000
--- a/math/aarch64/v_math.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Vector math abstractions.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef _V_MATH_H
-#define _V_MATH_H
-
-#if !__aarch64__
-# error "Cannot build without AArch64"
-#endif
-
-#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
-
-#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
-#define V_NAME_D1(fun) _ZGVnN2v_##fun
-#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
-#define V_NAME_D2(fun) _ZGVnN2vv_##fun
-
-#include <stdint.h>
-#include "../math_config.h"
-#include <arm_neon.h>
-
-/* Shorthand helpers for declaring constants.  */
-#  define V2(X) { X, X }
-#  define V4(X) { X, X, X, X }
-#  define V8(X) { X, X, X, X, X, X, X, X }
-
-static inline int
-v_any_u16h (uint16x4_t x)
-{
-  return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
-}
-
-static inline int
-v_lanes32 (void)
-{
-  return 4;
-}
-
-static inline float32x4_t
-v_f32 (float x)
-{
-  return (float32x4_t) V4 (x);
-}
-static inline uint32x4_t
-v_u32 (uint32_t x)
-{
-  return (uint32x4_t) V4 (x);
-}
-/* true if any elements of a v_cond result is non-zero.  */
-static inline int
-v_any_u32 (uint32x4_t x)
-{
-  /* assume elements in x are either 0 or -1u.  */
-  return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
-}
-static inline int
-v_any_u32h (uint32x2_t x)
-{
-  return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
-}
-static inline float32x4_t
-v_lookup_f32 (const float *tab, uint32x4_t idx)
-{
-  return (float32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
-}
-static inline uint32x4_t
-v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
-{
-  return (uint32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
-}
-static inline float32x4_t
-v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
-{
-  return (float32x4_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
-		       p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
-}
-static inline float32x4_t
-v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
-	     float32x4_t y, uint32x4_t p)
-{
-  return (float32x4_t){p[0] ? f (x1[0], x2[0]) : y[0],
-		       p[1] ? f (x1[1], x2[1]) : y[1],
-		       p[2] ? f (x1[2], x2[2]) : y[2],
-		       p[3] ? f (x1[3], x2[3]) : y[3]};
-}
-
-static inline int
-v_lanes64 (void)
-{
-  return 2;
-}
-static inline float64x2_t
-v_f64 (double x)
-{
-  return (float64x2_t) V2 (x);
-}
-static inline uint64x2_t
-v_u64 (uint64_t x)
-{
-  return (uint64x2_t) V2 (x);
-}
-/* true if any elements of a v_cond result is non-zero.  */
-static inline int
-v_any_u64 (uint64x2_t x)
-{
-  /* assume elements in x are either 0 or -1u.  */
-  return vpaddd_u64 (x) != 0;
-}
-static inline float64x2_t
-v_lookup_f64 (const double *tab, uint64x2_t idx)
-{
-  return (float64x2_t){tab[idx[0]], tab[idx[1]]};
-}
-static inline uint64x2_t
-v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
-{
-  return (uint64x2_t){tab[idx[0]], tab[idx[1]]};
-}
-static inline float64x2_t
-v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
-{
-  double p1 = p[1];
-  double x1 = x[1];
-  if (likely (p[0]))
-    y[0] = f (x[0]);
-  if (likely (p1))
-    y[1] = f (x1);
-  return y;
-}
-
-#endif
diff --git a/math/aarch64/v_pow.c b/math/aarch64/v_pow.c
deleted file mode 100644
index 734f1663a283d4..00000000000000
--- a/math/aarch64/v_pow.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Double-precision vector pow function.
- *
- * Copyright (c) 2020-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
-{
-  float64x2_t z;
-  for (int lane = 0; lane < v_lanes64 (); lane++)
-    {
-      double sx = x[lane];
-      double sy = y[lane];
-      double sz = pow (sx, sy);
-      z[lane] = sz;
-    }
-  return z;
-}
diff --git a/pl/math/v_pow_exp_data.c b/math/aarch64/v_pow_exp_data.c
similarity index 99%
rename from pl/math/v_pow_exp_data.c
rename to math/aarch64/v_pow_exp_data.c
index 5d921ef648a48a..db615ce94bd7c8 100644
--- a/pl/math/v_pow_exp_data.c
+++ b/math/aarch64/v_pow_exp_data.c
@@ -1,7 +1,7 @@
 /*
  * Shared data between exp, exp2 and pow.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/v_pow_log_data.c b/math/aarch64/v_pow_log_data.c
similarity index 99%
rename from pl/math/v_pow_log_data.c
rename to math/aarch64/v_pow_log_data.c
index 036faa5c97c1d2..7df277f74e4f4c 100644
--- a/pl/math/v_pow_log_data.c
+++ b/math/aarch64/v_pow_log_data.c
@@ -1,7 +1,7 @@
 /*
  * Data for the log part of pow.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/math/aarch64/v_powf.c b/math/aarch64/v_powf.c
deleted file mode 100644
index 3a4163ab05582b..00000000000000
--- a/math/aarch64/v_powf.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Single-precision vector powf function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-
-#define Min v_u32 (0x00800000)
-#define Max v_u32 (0x7f800000)
-#define Thresh v_u32 (0x7f000000) /* Max - Min.  */
-#define MantissaMask v_u32 (0x007fffff)
-
-#define A data.log2_poly
-#define C data.exp2f_poly
-
-/* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
-#define Off v_u32 (0x3f35d000)
-
-#define V_POWF_LOG2_TABLE_BITS 5
-#define V_EXP2F_TABLE_BITS 5
-#define Log2IdxMask v_u32 ((1 << V_POWF_LOG2_TABLE_BITS) - 1)
-#define Scale ((double) (1 << V_EXP2F_TABLE_BITS))
-
-static const struct
-{
-  struct
-  {
-    double invc, logc;
-  } log2_tab[1 << V_POWF_LOG2_TABLE_BITS];
-  double log2_poly[4];
-  uint64_t exp2f_tab[1 << V_EXP2F_TABLE_BITS];
-  double exp2f_poly[3];
-} data = {
-  .log2_tab = {{0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * Scale},
-	       {0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * Scale},
-	       {0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * Scale},
-	       {0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * Scale},
-	       {0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * Scale},
-	       {0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * Scale},
-	       {0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * Scale},
-	       {0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * Scale},
-	       {0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * Scale},
-	       {0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * Scale},
-	       {0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * Scale},
-	       {0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * Scale},
-	       {0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * Scale},
-	       {0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * Scale},
-	       {0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * Scale},
-	       {0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * Scale},
-	       {0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * Scale},
-	       {0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * Scale},
-	       {0x1p+0, 0x0p+0 * Scale},
-	       {0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * Scale},
-	       {0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * Scale},
-	       {0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * Scale},
-	       {0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * Scale},
-	       {0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * Scale},
-	       {0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * Scale},
-	       {0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * Scale},
-	       {0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * Scale},
-	       {0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * Scale},
-	       {0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * Scale},
-	       {0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * Scale},
-	       {0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * Scale},
-	       {0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * Scale},},
-  .log2_poly = { /* rel err: 1.5 * 2^-30.  */
-		-0x1.6ff5daa3b3d7cp-2 * Scale, 0x1.ec81d03c01aebp-2 * Scale,
-		-0x1.71547bb43f101p-1 * Scale, 0x1.7154764a815cbp0 * Scale,},
-  .exp2f_tab = {0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
-		0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
-		0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
-		0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
-		0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
-		0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
-		0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
-		0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
-		0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
-		0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
-		0x3fefa4afa2a490da, 0x3fefd0765b6e4540,},
-  .exp2f_poly = { /* rel err: 1.69 * 2^-34.  */
-		 0x1.c6af84b912394p-5 / Scale / Scale / Scale,
-		 0x1.ebfce50fac4f3p-3 / Scale / Scale,
-		 0x1.62e42ff0c52d6p-1 / Scale}};
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)
-{
-  return v_call2_f32 (powf, x, y, ret, cmp);
-}
-
-float32x4_t VPCS_ATTR V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)
-{
-  uint32x4_t u = vreinterpretq_u32_f32 (x);
-  uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (u, Min), Thresh);
-  uint32x4_t tmp = vsubq_u32 (u, Off);
-  uint32x4_t i = vandq_u32 (vshrq_n_u32 (tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
-			    Log2IdxMask);
-  uint32x4_t top = vbicq_u32 (tmp, MantissaMask);
-  uint32x4_t iz = vsubq_u32 (u, top);
-  int32x4_t k = vshrq_n_s32 (vreinterpretq_s32_u32 (top),
-			     23 - V_EXP2F_TABLE_BITS); /* arithmetic shift.  */
-
-  float32x4_t ret;
-  for (int lane = 0; lane < 4; lane++)
-    {
-      /* Use double precision for each lane.  */
-      double invc = data.log2_tab[i[lane]].invc;
-      double logc = data.log2_tab[i[lane]].logc;
-      double z = (double) asfloat (iz[lane]);
-
-      /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
-      double r = __builtin_fma (z, invc, -1.0);
-      double y0 = logc + (double) k[lane];
-
-      /* Polynomial to approximate log1p(r)/ln2.  */
-      double logx = A[0];
-      logx = r * logx + A[1];
-      logx = r * logx + A[2];
-      logx = r * logx + A[3];
-      logx = r * logx + y0;
-      double ylogx = y[lane] * logx;
-      cmp[lane] = (asuint64 (ylogx) >> 47 & 0xffff)
-			  >= asuint64 (126.0 * (1 << V_EXP2F_TABLE_BITS)) >> 47
-		      ? 1
-		      : cmp[lane];
-
-      /* N*x = k + r with r in [-1/2, 1/2].  */
-      double kd = round (ylogx);
-      uint64_t ki = lround (ylogx);
-      r = ylogx - kd;
-
-      /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
-      uint64_t t = data.exp2f_tab[ki % (1 << V_EXP2F_TABLE_BITS)];
-      t += ki << (52 - V_EXP2F_TABLE_BITS);
-      double s = asdouble (t);
-      double p = C[0];
-      p = __builtin_fma (p, r, C[1]);
-      p = __builtin_fma (p, r, C[2]);
-      p = __builtin_fma (p, s * r, s);
-
-      ret[lane] = p;
-    }
-  if (unlikely (v_any_u32 (cmp)))
-    return special_case (x, y, ret, cmp);
-  return ret;
-}
diff --git a/pl/math/v_powf_data.c b/math/aarch64/v_powf_data.c
similarity index 98%
rename from pl/math/v_powf_data.c
rename to math/aarch64/v_powf_data.c
index ded211924b8047..5cf1b876941450 100644
--- a/pl/math/v_powf_data.c
+++ b/math/aarch64/v_powf_data.c
@@ -1,7 +1,7 @@
 /*
  * Coefficients for single-precision SVE pow(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/math/cosf.c b/math/cosf.c
index 6293ce8f1b7d6b..a9b1f9da16ed55 100644
--- a/math/cosf.c
+++ b/math/cosf.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision cos function.
  *
- * Copyright (c) 2018-2021, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,6 +9,8 @@
 #include <math.h>
 #include "math_config.h"
 #include "sincosf.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /* Fast cosf implementation.  Worst-case ULP is 0.5607, maximum relative
    error is 0.5303 * 2^-23.  A single-step range reduction is used for
@@ -61,3 +63,9 @@ cosf (float y)
   else
     return __math_invalidf (y);
 }
+
+TEST_SIG (S, F, 1, cos, -3.1, 3.1)
+TEST_ULP (cosf, 0.06)
+TEST_ULP_NONNEAREST (cosf, 0.5)
+TEST_INTERVAL (cosf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (cosf, 0x1p-14, 0x1p54, 50000)
diff --git a/math/erf.c b/math/erf.c
index 5f9f40dda26434..2c93a304346a1f 100644
--- a/math/erf.c
+++ b/math/erf.c
@@ -1,13 +1,15 @@
 /*
  * Double-precision erf(x) function.
  *
- * Copyright (c) 2020, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include <math.h>
 #include <stdint.h>
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
 #define C 0x1.b0ac16p-1
@@ -242,3 +244,11 @@ erf (double x)
 	return 1.0;
     }
 }
+
+TEST_SIG (S, D, 1, erf, -6.0, 6.0)
+TEST_ULP (erf, 0.51)
+TEST_ULP_NONNEAREST (erf, 0.9)
+TEST_INTERVAL (erf, 0, 0xffff000000000000, 10000)
+TEST_SYM_INTERVAL (erf, 0x1p-1022, 0x1p-26, 40000)
+TEST_SYM_INTERVAL (erf, 0x1p-26, 0x1p3, 40000)
+TEST_INTERVAL (erf, 0, inf, 40000)
diff --git a/math/erff.c b/math/erff.c
index 9fa476dbbab2d7..fd64f40a2d22c9 100644
--- a/math/erff.c
+++ b/math/erff.c
@@ -1,13 +1,15 @@
 /*
  * Single-precision erf(x) function.
  *
- * Copyright (c) 2020, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <math.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
 #define A __erff_data.erff_poly_A
@@ -102,3 +104,11 @@ erff (float x)
     }
   return r;
 }
+
+TEST_SIG (S, F, 1, erf, -6.0, 6.0)
+TEST_ULP (erff, 0.6)
+TEST_ULP_NONNEAREST (erff, 0.9)
+TEST_INTERVAL (erff, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (erff, 0x1p-127, 0x1p-26, 40000)
+TEST_SYM_INTERVAL (erff, 0x1p-26, 0x1p3, 40000)
+TEST_INTERVAL (erff, 0, inf, 40000)
diff --git a/math/exp.c b/math/exp.c
index 1de500c31f3ed0..3b08d44688a803 100644
--- a/math/exp.c
+++ b/math/exp.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision e^x function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,6 +9,8 @@
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define N (1 << EXP_TABLE_BITS)
 #define InvLn2N __exp_data.invln2N
@@ -77,7 +79,7 @@ top12 (double x)
 /* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    If hastail is 0 then xtail is assumed to be 0 too.  */
 static inline double
-exp_inline (double x, double xtail, int hastail)
+exp_inline (double x, double xtail)
 {
   uint32_t abstop;
   uint64_t ki, idx, top, sbits;
@@ -125,7 +127,7 @@ exp_inline (double x, double xtail, int hastail)
 #endif
   r = x + kd * NegLn2hiN + kd * NegLn2loN;
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  if (hastail)
+  if (!__builtin_constant_p (xtail) || xtail != 0.0)
     r += xtail;
   /* 2^(k/N) ~= scale * (1 + tail).  */
   idx = 2 * (ki % N);
@@ -156,21 +158,20 @@ exp_inline (double x, double xtail, int hastail)
 double
 exp (double x)
 {
-  return exp_inline (x, 0, 0);
+  return exp_inline (x, 0);
 }
 
-/* May be useful for implementing pow where more than double
-   precision input is needed.  */
-double
-__exp_dd (double x, double xtail)
-{
-  return exp_inline (x, xtail, 1);
-}
 #if USE_GLIBC_ABI
 strong_alias (exp, __exp_finite)
 hidden_alias (exp, __ieee754_exp)
-hidden_alias (__exp_dd, __exp1)
 # if LDBL_MANT_DIG == 53
 long double expl (long double x) { return exp (x); }
 # endif
 #endif
+
+TEST_SIG (S, D, 1, exp, -9.9, 9.9)
+TEST_ULP (exp, 0.01)
+TEST_ULP_NONNEAREST (exp, 0.5)
+TEST_INTERVAL (exp, 0, 0xffff000000000000, 10000)
+TEST_SYM_INTERVAL (exp, 0x1p-6, 0x1p6, 400000)
+TEST_SYM_INTERVAL (exp, 633.3, 733.3, 10000)
diff --git a/math/exp10.c b/math/exp10.c
index 0fbec4c694ca83..de8ece42e09e63 100644
--- a/math/exp10.c
+++ b/math/exp10.c
@@ -1,11 +1,13 @@
 /*
  * Double-precision 10^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define N (1 << EXP_TABLE_BITS)
 #define IndexMask (N - 1)
@@ -22,7 +24,7 @@ special_case (uint64_t sbits, double_t tmp, uint64_t ki)
 {
   double_t scale, y;
 
-  if (ki - (1ull << 16) < 0x80000000)
+  if ((ki & 0x80000000) == 0)
     {
       /* The exponent of scale might have overflowed by 1.  */
       sbits -= 1ull << 52;
@@ -84,14 +86,14 @@ exp10 (double x)
   /* Reduce x: z = x * N / log10(2), k = round(z).  */
   double_t z = __exp_data.invlog10_2N * x;
   double_t kd;
-  int64_t ki;
+  uint64_t ki;
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
   ki = converttoint (z);
 #else
   kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd);
   kd -= Shift;
-  ki = kd;
 #endif
 
   /* r = x - k * log10(2), r in [-0.5, 0.5].  */
@@ -127,3 +129,15 @@ exp10 (double x)
   double_t s = asdouble (sbits);
   return eval_as_double (s * y + s);
 }
+
+#if WANT_EXP10_TESTS
+TEST_SIG (S, D, 1, exp10, -9.9, 9.9)
+TEST_ULP (exp10, 0.02)
+TEST_ULP_NONNEAREST (exp10, 0.5)
+TEST_SYM_INTERVAL (exp10, 0, 0x1p-47, 5000)
+TEST_SYM_INTERVAL (exp10, 0x1p47, 1, 50000)
+TEST_INTERVAL (exp10, 1, OFlowBound, 50000)
+TEST_INTERVAL (exp10, -1, UFlowBound, 50000)
+TEST_INTERVAL (exp10, OFlowBound, inf, 5000)
+TEST_INTERVAL (exp10, UFlowBound, -inf, 5000)
+#endif
diff --git a/math/exp2.c b/math/exp2.c
index a1eee44f1f4828..f26ac3cda2ccbe 100644
--- a/math/exp2.c
+++ b/math/exp2.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision 2^x function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,6 +9,8 @@
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define N (1 << EXP_TABLE_BITS)
 #define Shift __exp_data.exp2_shift
@@ -141,3 +143,10 @@ hidden_alias (exp2, __ieee754_exp2)
 long double exp2l (long double x) { return exp2 (x); }
 # endif
 #endif
+
+TEST_SIG (S, D, 1, exp2, -9.9, 9.9)
+TEST_ULP (exp2, 0.01)
+TEST_ULP_NONNEAREST (exp2, 0.5)
+TEST_INTERVAL (exp2, 0, 0xffff000000000000, 10000)
+TEST_SYM_INTERVAL (exp2, 0x1p-6, 0x1p6, 40000)
+TEST_SYM_INTERVAL (exp2, 633.3, 733.3, 10000)
diff --git a/math/exp2f.c b/math/exp2f.c
index 776c3ddf76636a..3202f41377adce 100644
--- a/math/exp2f.c
+++ b/math/exp2f.c
@@ -1,13 +1,15 @@
 /*
  * Single-precision 2^x function.
  *
- * Copyright (c) 2017-2018, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /*
 EXP2F_TABLE_BITS = 5
@@ -78,3 +80,9 @@ exp2f (float x)
 strong_alias (exp2f, __exp2f_finite)
 hidden_alias (exp2f, __ieee754_exp2f)
 #endif
+
+TEST_SIG (S, F, 1, exp2, -9.9, 9.9)
+TEST_ULP (exp2f, 0.01)
+TEST_ULP_NONNEAREST (exp2f, 0.5)
+TEST_INTERVAL (exp2f, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (exp2f, 0x1p-14, 0x1p8, 50000)
diff --git a/math/expf.c b/math/expf.c
index 08a20d59e49145..6572b99a1e68a7 100644
--- a/math/expf.c
+++ b/math/expf.c
@@ -1,13 +1,15 @@
 /*
  * Single-precision e^x function.
  *
- * Copyright (c) 2017-2019, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /*
 EXP2F_TABLE_BITS = 5
@@ -89,3 +91,9 @@ expf (float x)
 strong_alias (expf, __expf_finite)
 hidden_alias (expf, __ieee754_expf)
 #endif
+
+TEST_SIG (S, F, 1, exp, -9.9, 9.9)
+TEST_ULP (expf, 0.01)
+TEST_ULP_NONNEAREST (expf, 0.5)
+TEST_INTERVAL (expf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (expf, 0x1p-14, 0x1p8, 500000)
diff --git a/math/include/mathlib.h b/math/include/mathlib.h
index 64cbb9c1f8506e..23d04da99d93f2 100644
--- a/math/include/mathlib.h
+++ b/math/include/mathlib.h
@@ -1,58 +1,268 @@
 /*
  * Public API.
  *
- * Copyright (c) 2015-2023, Arm Limited.
+ * Copyright (c) 2015-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef _MATHLIB_H
 #define _MATHLIB_H
 
-float expf (float);
-float exp2f (float);
-float logf (float);
-float log2f (float);
-float powf (float, float);
-float sinf (float);
-float cosf (float);
-void sincosf (float, float*, float*);
-
-double exp (double);
-double exp10 (double);
-double exp2 (double);
-double log (double);
-double log2 (double);
-double pow (double, double);
-
 #if __aarch64__
-# if __GNUC__ >= 5
-typedef __Float32x4_t __f32x4_t;
-typedef __Float64x2_t __f64x2_t;
-# elif __clang_major__*100+__clang_minor__ >= 305
-typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
-typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
-# else
-#  error Unsupported compiler
-# endif
+/* Low-accuracy scalar implementations of C23 routines.  */
+float arm_math_cospif (float);
+double arm_math_cospi (double);
+float arm_math_sinpif (float);
+double arm_math_sinpi (double);
+float arm_math_tanpif (float);
+double arm_math_tanpi (double);
+void arm_math_sincospif (float, float *, float *);
+void arm_math_sincospi (double, double *, double *);
+#endif
+
+/* SIMD declaration for autovectorisation with fast-math enabled. Only GCC is
+   supported, and vector routines are only supported on Linux on AArch64.  */
+#if defined __aarch64__ && __linux__ && defined(__GNUC__)                     \
+    && !defined(__clang__) && defined(__FAST_MATH__)
+#  define DECL_SIMD_aarch64 __attribute__ ((__simd__ ("notinbranch"), const))
+#else
+#  define DECL_SIMD_aarch64
+#endif
+
+#if WANT_EXPERIMENTAL_MATH
+
+float arm_math_erff (float);
+DECL_SIMD_aarch64 float cospif (float);
+DECL_SIMD_aarch64 float erfinvf (float);
+DECL_SIMD_aarch64 float sinpif (float);
+DECL_SIMD_aarch64 float tanpif (float);
+
+double arm_math_erf (double);
+DECL_SIMD_aarch64 double cospi (double);
+DECL_SIMD_aarch64 double erfinv (double);
+DECL_SIMD_aarch64 double sinpi (double);
+DECL_SIMD_aarch64 double tanpi (double);
+
+long double erfinvl (long double);
+
+#endif
 
-# if __GNUC__ >= 9 || __clang_major__ >= 8
-#  undef __vpcs
-#  define __vpcs __attribute__((__aarch64_vector_pcs__))
+/* Note these routines may not be provided by AOR (some are only available with
+   WANT_EXPERIMENTAL_MATH, some are not provided at all. Redeclare them here to
+   add vector annotations.  */
+DECL_SIMD_aarch64 float acosf (float);
+DECL_SIMD_aarch64 float acoshf (float);
+DECL_SIMD_aarch64 float asinf (float);
+DECL_SIMD_aarch64 float asinhf (float);
+DECL_SIMD_aarch64 float atan2f (float, float);
+DECL_SIMD_aarch64 float atanf (float);
+DECL_SIMD_aarch64 float atanhf (float);
+DECL_SIMD_aarch64 float cbrtf (float);
+DECL_SIMD_aarch64 float cosf (float);
+DECL_SIMD_aarch64 float coshf (float);
+DECL_SIMD_aarch64 float erfcf (float);
+DECL_SIMD_aarch64 float erff (float);
+DECL_SIMD_aarch64 float exp10f (float);
+DECL_SIMD_aarch64 float exp2f (float);
+DECL_SIMD_aarch64 float expf (float);
+DECL_SIMD_aarch64 float expm1f (float);
+DECL_SIMD_aarch64 float hypotf (float, float);
+DECL_SIMD_aarch64 float log10f (float);
+DECL_SIMD_aarch64 float log1pf (float);
+DECL_SIMD_aarch64 float log2f (float);
+DECL_SIMD_aarch64 float logf (float);
+DECL_SIMD_aarch64 float powf (float, float);
+DECL_SIMD_aarch64 float sinf (float);
+void sincosf (float, float *, float *);
+DECL_SIMD_aarch64 float sinhf (float);
+DECL_SIMD_aarch64 float tanf (float);
+DECL_SIMD_aarch64 float tanhf (float);
+
+DECL_SIMD_aarch64 double acos (double);
+DECL_SIMD_aarch64 double acosh (double);
+DECL_SIMD_aarch64 double asin (double);
+DECL_SIMD_aarch64 double asinh (double);
+DECL_SIMD_aarch64 double atan2 (double, double);
+DECL_SIMD_aarch64 double atan (double);
+DECL_SIMD_aarch64 double atanh (double);
+DECL_SIMD_aarch64 double cbrt (double);
+DECL_SIMD_aarch64 double cos (double);
+DECL_SIMD_aarch64 double cosh (double);
+DECL_SIMD_aarch64 double erfc (double);
+DECL_SIMD_aarch64 double erf (double);
+DECL_SIMD_aarch64 double exp10 (double);
+DECL_SIMD_aarch64 double exp2 (double);
+DECL_SIMD_aarch64 double exp (double);
+DECL_SIMD_aarch64 double expm1 (double);
+DECL_SIMD_aarch64 double hypot (double, double);
+DECL_SIMD_aarch64 double log10 (double);
+DECL_SIMD_aarch64 double log1p (double);
+DECL_SIMD_aarch64 double log2 (double);
+DECL_SIMD_aarch64 double log (double);
+DECL_SIMD_aarch64 double pow (double, double);
+DECL_SIMD_aarch64 double sin (double);
+DECL_SIMD_aarch64 double sinh (double);
+DECL_SIMD_aarch64 double tan (double);
+DECL_SIMD_aarch64 double tanh (double);
+
+#if __aarch64__ && __linux__
+# include <arm_neon.h>
+# undef __vpcs
+# define __vpcs __attribute__((__aarch64_vector_pcs__))
 
 /* Vector functions following the vector PCS using ABI names.  */
-__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_expf_1u (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_exp2f_1u (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
+__vpcs float32x4_t _ZGVnN4v_acosf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_acoshf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_asinf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_asinhf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_atanf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_atanhf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_cbrtf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_cosf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_coshf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_cospif (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_erfcf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_erff (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_exp10f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_exp2f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_exp2f_1u (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_expf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_expf_1u (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_expm1f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_log10f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_log1pf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_log2f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_logf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_sinf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_sinhf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_sinpif (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_tanf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_tanhf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_tanpif (float32x4_t);
+__vpcs float32x4_t _ZGVnN4vl4_modff (float32x4_t, float *);
+__vpcs float32x4_t _ZGVnN4vv_atan2f (float32x4_t, float32x4_t);
+__vpcs float32x4_t _ZGVnN4vv_hypotf (float32x4_t, float32x4_t);
+__vpcs float32x4_t _ZGVnN4vv_powf (float32x4_t, float32x4_t);
+__vpcs float32x4x2_t _ZGVnN4v_cexpif (float32x4_t);
+__vpcs void _ZGVnN4vl4l4_sincosf (float32x4_t, float *, float *);
+__vpcs void _ZGVnN4vl4l4_sincospif (float32x4_t, float *, float *);
+
+__vpcs float64x2_t _ZGVnN2v_acos (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_acosh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_asin (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_asinh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_atan (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_atanh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_cbrt (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_cos (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_cosh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_cospi (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_erf (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_erfc (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_exp (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_exp10 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_exp2 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_expm1 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log10 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log1p (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log2 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_sin (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_sinh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_sinpi (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_tan (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_tanh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_tanpi (float64x2_t);
+__vpcs float64x2_t _ZGVnN2vl8_modf (float64x2_t, double *);
+__vpcs float64x2_t _ZGVnN2vv_atan2 (float64x2_t, float64x2_t);
+__vpcs float64x2_t _ZGVnN2vv_hypot (float64x2_t, float64x2_t);
+__vpcs float64x2_t _ZGVnN2vv_pow (float64x2_t, float64x2_t);
+__vpcs float64x2x2_t _ZGVnN2v_cexpi (float64x2_t);
+__vpcs void _ZGVnN2vl8l8_sincos (float64x2_t, double *, double *);
+__vpcs void _ZGVnN2vl8l8_sincospi (float64x2_t, double *, double *);
+
+# if WANT_EXPERIMENTAL_MATH
+__vpcs float32x4_t _ZGVnN4v_erfinvf (float32x4_t);
+__vpcs float64x2_t _ZGVnN2v_erfinv (float64x2_t);
+# endif
+
+#  include <arm_sve.h>
+svfloat32_t _ZGVsMxv_acosf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_acoshf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_asinf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_asinhf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_atanf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_atanhf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_cbrtf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_cosf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_coshf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_cospif (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_erfcf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_erff (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_exp10f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_exp2f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_expf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_expm1f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_log10f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_log1pf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_log2f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_logf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinhf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinpif (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanhf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanpif (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxvl4_modff (svfloat32_t, float *, svbool_t);
+svfloat32_t _ZGVsMxvv_atan2f (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxvv_hypotf (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxvv_powf (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32x2_t _ZGVsMxv_cexpif (svfloat32_t, svbool_t);
+void _ZGVsMxvl4l4_sincosf (svfloat32_t, float *, float *, svbool_t);
+void _ZGVsMxvl4l4_sincospif (svfloat32_t, float *, float *, svbool_t);
+
+svfloat64_t _ZGVsMxv_acos (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_acosh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_asin (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_asinh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_atan (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_atanh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cbrt (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cos (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cosh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cospi (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_erf (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_erfc (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp10 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp2 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_expm1 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_log (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_log10 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_log1p (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_log2 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_sin (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_sinh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_sinpi (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_tan (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_tanh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_tanpi (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxvl8_modf (svfloat64_t, double *, svbool_t);
+svfloat64_t _ZGVsMxvv_atan2 (svfloat64_t, svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxvv_hypot (svfloat64_t, svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxvv_pow (svfloat64_t, svfloat64_t, svbool_t);
+svfloat64x2_t _ZGVsMxv_cexpi (svfloat64_t, svbool_t);
+void _ZGVsMxvl8l8_sincos (svfloat64_t, double *, double *, svbool_t);
+void _ZGVsMxvl8l8_sincospi (svfloat64_t, double *, double *, svbool_t);
+
+#  if WANT_EXPERIMENTAL_MATH
+
+svfloat32_t _ZGVsMxv_erfinvf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxvv_powi (svfloat32_t, svint32_t, svbool_t);
+
+svfloat64_t _ZGVsMxvv_powk (svfloat64_t, svint64_t, svbool_t);
+svfloat64_t _ZGVsMxv_erfinv (svfloat64_t, svbool_t);
+
 # endif
 #endif
 
diff --git a/math/include/test_defs.h b/math/include/test_defs.h
new file mode 100644
index 00000000000000..2fe66fa6f14c17
--- /dev/null
+++ b/math/include/test_defs.h
@@ -0,0 +1,21 @@
+/*
+ * Helper macros for emitting various details about routines for consumption by
+ * runulp.sh. This version of the file is for inclusion when building routines,
+ * so expansions are empty - see math/test/test_defs for versions used by the
+ * build system.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+#define TEST_ULP(f, l)
+#define TEST_ULP_NONNEAREST(f, l)
+
+#define TEST_DISABLE_FENV(f)
+#define TEST_DISABLE_FENV_IF_NOT(f, e)
+
+#define TEST_INTERVAL(f, lo, hi, n)
+#define TEST_SYM_INTERVAL(f, lo, hi, n)
+#define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)
+
+#define TEST_CONTROL_VALUE(f, c)
diff --git a/math/include/test_sig.h b/math/include/test_sig.h
new file mode 100644
index 00000000000000..a967829098d6c7
--- /dev/null
+++ b/math/include/test_sig.h
@@ -0,0 +1,47 @@
+/*
+ * Macros for emitting various ulp/bench entries based on function signature
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+#define TEST_DECL_SF1(fun) float fun##f (float);
+#define TEST_DECL_SF2(fun) float fun##f (float, float);
+#define TEST_DECL_SD1(fun) double fun (double);
+#define TEST_DECL_SD2(fun) double fun (double, double);
+
+#define TEST_DECL_VF1(fun)                                                    \
+  float32x4_t VPCS_ATTR V_NAME_F1 (fun##f) (float32x4_t);
+#define TEST_DECL_VF2(fun)                                                    \
+  float32x4_t VPCS_ATTR V_NAME_F2 (fun##f) (float32x4_t, float32x4_t);
+#define TEST_DECL_VD1(fun) VPCS_ATTR float64x2_t V_NAME_D1 (fun) (float64x2_t);
+#define TEST_DECL_VD2(fun)                                                    \
+  VPCS_ATTR float64x2_t V_NAME_D2 (fun) (float64x2_t, float64x2_t);
+
+#define TEST_DECL_SVF1(fun)                                                   \
+  svfloat32_t SV_NAME_F1 (fun) (svfloat32_t, svbool_t);
+#define TEST_DECL_SVF2(fun)                                                   \
+  svfloat32_t SV_NAME_F2 (fun) (svfloat32_t, svfloat32_t, svbool_t);
+#define TEST_DECL_SVD1(fun)                                                   \
+  svfloat64_t SV_NAME_D1 (fun) (svfloat64_t, svbool_t);
+#define TEST_DECL_SVD2(fun)                                                   \
+  svfloat64_t SV_NAME_D2 (fun) (svfloat64_t, svfloat64_t, svbool_t);
+
+/* For building the routines, emit function prototype from TEST_SIG. This
+   ensures that the correct signature has been chosen (wrong one will be a
+   compile error). TEST_SIG is defined differently by various components of the
+   build system to emit entries in the wrappers and entries for mathbench and
+   ulp.  */
+#ifndef _TEST_SIG
+# if defined(EMIT_ULP_FUNCS)
+#  define _TEST_SIG(v, t, a, f, ...) TEST_SIG _Z##v##t##a (f)
+# elif defined(EMIT_ULP_WRAPPERS)
+#  define _TEST_SIG(v, t, a, f, ...) TEST_SIG Z##v##N##t##a##_WRAP (f)
+# elif defined(EMIT_MATHBENCH_FUNCS)
+#  define _TEST_SIG(v, t, a, f, ...) TEST_SIG _Z##v##t##a (f, ##__VA_ARGS__)
+# else
+#  define _TEST_SIG(v, t, a, f, ...) TEST_DECL_##v##t##a (f)
+# endif
+#endif
+
+#define TEST_SIG(...) _TEST_SIG (__VA_ARGS__)
diff --git a/math/log.c b/math/log.c
index 43dfc2a744f060..1d6244c30b79eb 100644
--- a/math/log.c
+++ b/math/log.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision log(x) function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,6 +9,8 @@
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define T __log_data.tab
 #define T2 __log_data.tab2
@@ -160,3 +162,10 @@ hidden_alias (log, __ieee754_log)
 long double logl (long double x) { return log (x); }
 # endif
 #endif
+
+TEST_SIG (S, D, 1, log, 0.01, 11.1)
+TEST_ULP (log, 0.02)
+TEST_ULP_NONNEAREST (log, 0.5)
+TEST_INTERVAL (log, 0, 0xffff000000000000, 10000)
+TEST_INTERVAL (log, 0x1p-4, 0x1p4, 400000)
+TEST_INTERVAL (log, 0, inf, 400000)
diff --git a/pl/math/log10f.c b/math/log10f.c
similarity index 84%
rename from pl/math/log10f.c
rename to math/log10f.c
index 5c80008e4e57be..f8561d063107d3 100644
--- a/pl/math/log10f.c
+++ b/math/log10f.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision log10 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,8 +9,8 @@
 #include <stdint.h>
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Data associated to logf:
 
@@ -30,7 +30,8 @@
 /* This naive implementation of log10f mimics that of log
    then simply scales the result by 1/log(10) to switch from base e to
    base 10. Hence, most computations are carried out in double precision.
-   Scaling before rounding to single precision is both faster and more accurate.
+   Scaling before rounding to single precision is both faster and more
+   accurate.
 
    ULP error: 0.797 ulp (nearest rounding.).  */
 float
@@ -88,10 +89,11 @@ log10f (float x)
   return eval_as_float (y);
 }
 
-PL_SIG (S, F, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (log10f, 0.30)
-PL_TEST_INTERVAL (log10f, 0, 0xffff0000, 10000)
-PL_TEST_INTERVAL (log10f, 0x1p-127, 0x1p-26, 50000)
-PL_TEST_INTERVAL (log10f, 0x1p-26, 0x1p3, 50000)
-PL_TEST_INTERVAL (log10f, 0x1p-4, 0x1p4, 50000)
-PL_TEST_INTERVAL (log10f, 0, inf, 50000)
+TEST_SIG (S, F, 1, log10, 0.01, 11.1)
+TEST_ULP (log10f, 0.30)
+TEST_ULP_NONNEAREST (log10f, 0.5)
+TEST_INTERVAL (log10f, 0, 0xffff0000, 10000)
+TEST_INTERVAL (log10f, 0x1p-127, 0x1p-26, 50000)
+TEST_INTERVAL (log10f, 0x1p-26, 0x1p3, 50000)
+TEST_INTERVAL (log10f, 0x1p-4, 0x1p4, 50000)
+TEST_INTERVAL (log10f, 0, inf, 50000)
diff --git a/math/log2.c b/math/log2.c
index 3f9c21b0396263..6462915a24f0c4 100644
--- a/math/log2.c
+++ b/math/log2.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision log2(x) function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,6 +9,8 @@
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define T __log2_data.tab
 #define T2 __log2_data.tab2
@@ -139,3 +141,10 @@ hidden_alias (log2, __ieee754_log2)
 long double log2l (long double x) { return log2 (x); }
 # endif
 #endif
+
+TEST_SIG (S, D, 1, log2, 0.01, 11.1)
+TEST_ULP (log2, 0.05)
+TEST_ULP_NONNEAREST (log2, 0.5)
+TEST_INTERVAL (log2, 0, 0xffff000000000000, 10000)
+TEST_INTERVAL (log2, 0x1p-4, 0x1p4, 40000)
+TEST_INTERVAL (log2, 0, inf, 40000)
diff --git a/math/log2f.c b/math/log2f.c
index 0a44fa2024f606..7d47379b41cbbb 100644
--- a/math/log2f.c
+++ b/math/log2f.c
@@ -1,13 +1,15 @@
 /*
  * Single-precision log2 function.
  *
- * Copyright (c) 2017-2018, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /*
 LOG2F_TABLE_BITS = 4
@@ -78,3 +80,10 @@ log2f (float x)
 strong_alias (log2f, __log2f_finite)
 hidden_alias (log2f, __ieee754_log2f)
 #endif
+
+TEST_SIG (S, F, 1, log2, 0.01, 11.1)
+TEST_ULP (log2f, 0.26)
+TEST_ULP_NONNEAREST (log2f, 0.5)
+TEST_INTERVAL (log2f, 0, 0xffff0000, 10000)
+TEST_INTERVAL (log2f, 0x1p-4, 0x1p4, 50000)
+TEST_INTERVAL (log2f, 0, inf, 50000)
diff --git a/math/logf.c b/math/logf.c
index 820f74c3e66a70..f2c26deaff19b9 100644
--- a/math/logf.c
+++ b/math/logf.c
@@ -1,13 +1,15 @@
 /*
  * Single-precision log function.
  *
- * Copyright (c) 2017-2023, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /*
 LOGF_TABLE_BITS = 4
@@ -77,3 +79,10 @@ logf (float x)
 strong_alias (logf, __logf_finite)
 hidden_alias (logf, __ieee754_logf)
 #endif
+
+TEST_SIG (S, F, 1, log, 0.01, 11.1)
+TEST_ULP (logf, 0.32)
+TEST_ULP_NONNEAREST (logf, 0.5)
+TEST_INTERVAL (logf, 0, 0xffff0000, 10000)
+TEST_INTERVAL (logf, 0x1p-4, 0x1p4, 500000)
+TEST_INTERVAL (logf, 0, inf, 50000)
diff --git a/math/logf_data.c b/math/logf_data.c
index 04247684755fdf..5c301a90af8e2d 100644
--- a/math/logf_data.c
+++ b/math/logf_data.c
@@ -1,7 +1,7 @@
 /*
  * Data definition for logf.
  *
- * Copyright (c) 2017-2019, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -27,6 +27,7 @@ const struct logf_data __logf_data = {
   { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
   },
   .ln2 = 0x1.62e42fefa39efp-1,
+  .invln10 = 0x1.bcb7b1526e50ep-2,
   .poly = {
   -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
   }
diff --git a/math/math_config.h b/math/math_config.h
index faf77b31fc99bd..0fc653f937617e 100644
--- a/math/math_config.h
+++ b/math/math_config.h
@@ -1,7 +1,7 @@
 /*
  * Configuration for math routines.
  *
- * Copyright (c) 2017-2023, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -13,9 +13,9 @@
 
 #ifndef WANT_ROUNDING
 /* If defined to 1, return correct results for special cases in non-nearest
-   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
-   This may be set to 0 if there is no fenv support or if math functions only
-   get called in round to nearest mode.  */
+   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
+   -0.0f). This may be set to 0 if there is no fenv support or if math
+   functions only get called in round to nearest mode.  */
 # define WANT_ROUNDING 1
 #endif
 #ifndef WANT_ERRNO
@@ -117,6 +117,25 @@
 #define __math_check_oflowf arm_math_check_oflowf
 #define __math_check_uflowf arm_math_check_uflowf
 
+#define __exp_data arm_math_exp_data
+#define __asin_poly arm_math_asin_poly
+#define __asinf_poly arm_math_asinf_poly
+#define __asinh_data arm_math_asinh_data
+#define __asinhf_data arm_math_asinhf_data
+#define __atan_poly_data arm_math_atan_poly_data
+#define __atanf_poly_data arm_math_atanf_poly_data
+#define __cbrt_data arm_math_cbrt_data
+#define __cbrtf_data arm_math_cbrtf_data
+#define __erf_data arm_math_erf_data
+#define __expf_data arm_math_expf_data
+#define __expm1_poly arm_math_expm1_poly
+#define __expm1f_poly arm_math_expm1f_poly
+#define __log10_data arm_math_log10_data
+#define __log1p_data arm_math_log1p_data
+#define __log1pf_data arm_math_log1pf_data
+#define __log_data arm_math_log_data
+#define __tanf_poly_data arm_math_tanf_poly_data
+#define __v_log_data arm_math_v_log_data
 #define __sincosf_table arm_math_sincosf_table
 #define __inv_pio4 arm_math_inv_pio4
 #define __exp2f_data arm_math_exp2f_data
@@ -131,6 +150,25 @@
 #define __erf_data arm_math_erf_data
 #define __v_exp_data arm_math_v_exp_data
 #define __v_log_data arm_math_v_log_data
+#define __v_erf_data arm_math_v_erf_data
+#define __v_erfc_data arm_math_v_erfc_data
+#define __v_erfcf_data arm_math_v_erfcf_data
+#define __v_erff_data arm_math_v_erff_data
+#define __v_exp_tail_data arm_math_v_exp_tail_data
+#define __v_log10_data arm_math_v_log10_data
+#define __v_log2_data arm_math_v_log2_data
+#define __v_pow_exp_data arm_math_v_pow_exp_data
+#define __v_pow_log_data arm_math_v_pow_log_data
+#define __v_powf_data arm_math_v_powf_data
+
+/* On some platforms (in particular Windows) INFINITY and HUGE_VAL might
+   be defined in such a way that might not produce the expected bit pattern,
+   therefore we enforce the glibc math.h definition using a builtin that is
+   supported in both gcc and clang.  */
+#if defined (_WIN32) && (defined (__GNUC__) || defined (__clang__))
+# undef INFINITY
+# define INFINITY __builtin_inff()
+#endif
 
 #if HAVE_FAST_ROUND
 /* When set, the roundtoint and converttoint functions are provided with
@@ -365,11 +403,12 @@ extern const struct exp2f_data
   uint64_t tab[1 << EXP2F_TABLE_BITS];
   double shift_scaled;
   double poly[EXP2F_POLY_ORDER];
-  double shift;
   double invln2_scaled;
   double poly_scaled[EXP2F_POLY_ORDER];
+  double shift;
 } __exp2f_data HIDDEN;
 
+/* Data for logf and log10f.  */
 #define LOGF_TABLE_BITS 4
 #define LOGF_POLY_ORDER 4
 extern const struct logf_data
@@ -379,6 +418,7 @@ extern const struct logf_data
     double invc, logc;
   } tab[1 << LOGF_TABLE_BITS];
   double ln2;
+  double invln10;
   double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
 } __logf_data HIDDEN;
 
@@ -427,17 +467,19 @@ extern const struct powf_log2_data
 extern const struct exp_data
 {
   double invln2N;
-  double invlog10_2N;
-  double shift;
   double negln2hiN;
   double negln2loN;
-  double neglog10_2hiN;
-  double neglog10_2loN;
   double poly[4]; /* Last four coefficients.  */
+  double shift;
+
   double exp2_shift;
   double exp2_poly[EXP2_POLY_ORDER];
+
+  double neglog10_2hiN;
+  double neglog10_2loN;
   double exp10_poly[5];
   uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+  double invlog10_2N;
 } __exp_data HIDDEN;
 
 #define LOG_TABLE_BITS 7
@@ -509,13 +551,214 @@ extern const struct erf_data
 #define V_EXP_TABLE_BITS 7
 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
 
+#define V_LOG_POLY_ORDER 6
 #define V_LOG_TABLE_BITS 7
 extern const struct v_log_data
 {
+  /* Shared data for vector log and log-derived routines (e.g. asinh).  */
+  double poly[V_LOG_POLY_ORDER - 1];
+  double ln2;
   struct
   {
     double invc, logc;
   } table[1 << V_LOG_TABLE_BITS];
 } __v_log_data HIDDEN;
 
+/* Some data for SVE powf's internal exp and log.  */
+#define V_POWF_EXP2_TABLE_BITS 5
+#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
+extern const struct v_powf_data
+{
+  double invc[V_POWF_LOG2_N];
+  double logc[V_POWF_LOG2_N];
+  uint64_t scale[V_POWF_EXP2_N];
+} __v_powf_data HIDDEN;
+
+/* Some data for AdvSIMD and SVE pow's internal exp and log.  */
+#define V_POW_EXP_TABLE_BITS 8
+extern const struct v_pow_exp_data
+{
+  double poly[3];
+  double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
+  uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
+} __v_pow_exp_data HIDDEN;
+
+#define V_POW_LOG_TABLE_BITS 7
+extern const struct v_pow_log_data
+{
+  double poly[7]; /* First coefficient is 1.  */
+  double ln2_hi, ln2_lo;
+  double invc[1 << V_POW_LOG_TABLE_BITS];
+  double logc[1 << V_POW_LOG_TABLE_BITS];
+  double logctail[1 << V_POW_LOG_TABLE_BITS];
+} __v_pow_log_data HIDDEN;
+
+#define V_LOG2_TABLE_BITS 7
+extern const struct v_log2_data
+{
+  double poly[5];
+  double invln2;
+  struct
+  {
+    double invc, log2c;
+  } table[1 << V_LOG2_TABLE_BITS];
+} __v_log2_data HIDDEN;
+
+#define V_LOG10_TABLE_BITS 7
+extern const struct v_log10_data
+{
+  double poly[5];
+  double invln10, log10_2;
+  struct
+  {
+    double invc, log10c;
+  } table[1 << V_LOG10_TABLE_BITS];
+} __v_log10_data HIDDEN;
+
+#define V_EXP_TAIL_TABLE_BITS 8
+extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
+
+extern const struct v_erff_data
+{
+  struct
+  {
+    float erf, scale;
+  } tab[513];
+} __v_erff_data HIDDEN;
+
+extern const struct v_erfcf_data
+{
+  struct
+  {
+    float erfc, scale;
+  } tab[645];
+} __v_erfcf_data HIDDEN;
+
+extern const struct v_erf_data
+{
+  struct
+  {
+    double erf, scale;
+  } tab[769];
+} __v_erf_data HIDDEN;
+
+extern const struct v_erfc_data
+{
+  struct
+  {
+    double erfc, scale;
+  } tab[3488];
+} __v_erfc_data HIDDEN;
+
+/* Table with 4/PI to 192 bit precision.  */
+extern const uint32_t __inv_pio4[] HIDDEN;
+
+#if WANT_EXPERIMENTAL_MATH
+
+# define LOG1P_NCOEFFS 19
+extern const struct log1p_data
+{
+  double coeffs[LOG1P_NCOEFFS];
+} __log1p_data HIDDEN;
+
+# define LOG1PF_2U5
+# define LOG1PF_NCOEFFS 9
+extern const struct log1pf_data
+{
+  float coeffs[LOG1PF_NCOEFFS];
+} __log1pf_data HIDDEN;
+
+# define ASINF_POLY_ORDER 4
+extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
+
+# define ASIN_POLY_ORDER 11
+extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
+
+# define ASINHF_NCOEFFS 8
+extern const struct asinhf_data
+{
+  float coeffs[ASINHF_NCOEFFS];
+} __asinhf_data HIDDEN;
+
+# define ASINH_NCOEFFS 18
+extern const struct asinh_data
+{
+  double poly[ASINH_NCOEFFS];
+} __asinh_data HIDDEN;
+
+# define ATAN_POLY_NCOEFFS 20
+extern const struct atan_poly_data
+{
+  double poly[ATAN_POLY_NCOEFFS];
+} __atan_poly_data HIDDEN;
+
+# define ATANF_POLY_NCOEFFS 8
+extern const struct atanf_poly_data
+{
+  float poly[ATANF_POLY_NCOEFFS];
+} __atanf_poly_data HIDDEN;
+
+extern const struct cbrtf_data
+{
+  float poly[4];
+  float table[5];
+} __cbrtf_data HIDDEN;
+
+extern const struct cbrt_data
+{
+  double poly[4];
+  double table[5];
+} __cbrt_data HIDDEN;
+
+# define EXPF_TABLE_BITS 5
+# define EXPF_POLY_ORDER 3
+extern const struct expf_data
+{
+  uint64_t tab[1 << EXPF_TABLE_BITS];
+  double invln2_scaled;
+  double poly_scaled[EXPF_POLY_ORDER];
+} __expf_data HIDDEN;
+
+# define EXPM1F_POLY_ORDER 5
+extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
+
+# define EXPM1_POLY_ORDER 11
+extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
+
+/* Data for low accuracy log10 (with 1/ln(10) included in coefficients).  */
+# define LOG10_TABLE_BITS 7
+# define LOG10_POLY_ORDER 6
+# define LOG10_POLY1_ORDER 12
+extern const struct log10_data
+{
+  double ln2hi;
+  double ln2lo;
+  double invln10;
+  double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10).  */
+  double poly1[LOG10_POLY1_ORDER - 1];
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOG10_TABLE_BITS];
+#  if !HAVE_FAST_FMA
+  struct
+  {
+    double chi, clo;
+  } tab2[1 << LOG10_TABLE_BITS];
+#  endif
+} __log10_data HIDDEN;
+
+# define TANF_P_POLY_NCOEFFS 6
+/* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps.  */
+# define TANF_Q_POLY_NCOEFFS 4
+extern const struct tanf_poly_data
+{
+  float poly_tan[TANF_P_POLY_NCOEFFS];
+  float poly_cotan[TANF_Q_POLY_NCOEFFS];
+} __tanf_poly_data HIDDEN;
+
+#endif /* WANT_EXPERIMENTAL_MATH.  */
+
 #endif
diff --git a/pl/math/poly_generic.h b/math/poly_generic.h
similarity index 99%
rename from pl/math/poly_generic.h
rename to math/poly_generic.h
index 3fc25f8762f256..c21b61aad4c3d3 100644
--- a/pl/math/poly_generic.h
+++ b/math/poly_generic.h
@@ -1,7 +1,7 @@
 /*
  * Generic helpers for evaluating polynomials with various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
diff --git a/pl/math/poly_scalar_f32.h b/math/poly_scalar_f32.h
similarity index 80%
rename from pl/math/poly_scalar_f32.h
rename to math/poly_scalar_f32.h
index a9b1c5544494c8..198e5801938a06 100644
--- a/pl/math/poly_scalar_f32.h
+++ b/math/poly_scalar_f32.h
@@ -2,12 +2,12 @@
  * Helpers for evaluating polynomials on siongle-precision scalar input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_SCALAR_F32_H
-#define PL_MATH_POLY_SCALAR_F32_H
+#ifndef MATH_POLY_SCALAR_F32_H
+#define MATH_POLY_SCALAR_F32_H
 
 #include <math.h>
 
diff --git a/pl/math/poly_scalar_f64.h b/math/poly_scalar_f64.h
similarity index 80%
rename from pl/math/poly_scalar_f64.h
rename to math/poly_scalar_f64.h
index 207dccee30ad07..6fbebe05d1df0d 100644
--- a/pl/math/poly_scalar_f64.h
+++ b/math/poly_scalar_f64.h
@@ -2,12 +2,12 @@
  * Helpers for evaluating polynomials on double-precision scalar input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_SCALAR_F64_H
-#define PL_MATH_POLY_SCALAR_F64_H
+#ifndef MATH_POLY_SCALAR_F64_H
+#define MATH_POLY_SCALAR_F64_H
 
 #include <math.h>
 
diff --git a/math/pow.c b/math/pow.c
index af719fe5ab1058..1983bb2bbeba86 100644
--- a/math/pow.c
+++ b/math/pow.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision x^y function.
  *
- * Copyright (c) 2018-2020, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,6 +9,7 @@
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
 
 /*
 Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
@@ -378,3 +379,22 @@ hidden_alias (pow, __ieee754_pow)
 long double powl (long double x, long double y) { return pow (x, y); }
 # endif
 #endif
+
+TEST_ULP (pow, 0.05)
+TEST_ULP_NONNEAREST (pow, 0.5)
+TEST_INTERVAL2 (pow, 0.5, 2.0, 0, inf, 20000)
+TEST_INTERVAL2 (pow, -0.5, -2.0, 0, inf, 20000)
+TEST_INTERVAL2 (pow, 0.5, 2.0, -0, -inf, 20000)
+TEST_INTERVAL2 (pow, -0.5, -2.0, -0, -inf, 20000)
+TEST_INTERVAL2 (pow, 0.5, 2.0, 0x1p-10, 0x1p10, 40000)
+TEST_INTERVAL2 (pow, 0.5, 2.0, -0x1p-10, -0x1p10, 40000)
+TEST_INTERVAL2 (pow, 0, inf, 0.5, 2.0, 80000)
+TEST_INTERVAL2 (pow, 0, inf, -0.5, -2.0, 80000)
+TEST_INTERVAL2 (pow, 0x1.fp-1, 0x1.08p0, 0x1p8, 0x1p17, 80000)
+TEST_INTERVAL2 (pow, 0x1.fp-1, 0x1.08p0, -0x1p8, -0x1p17, 80000)
+TEST_INTERVAL2 (pow, 0, 0x1p-1000, 0, 1.0, 50000)
+TEST_INTERVAL2 (pow, 0x1p1000, inf, 0, 1.0, 50000)
+TEST_INTERVAL2 (pow, 0x1.ffffffffffff0p-1, 0x1.0000000000008p0, 0x1p60, 0x1p68,
+		50000)
+TEST_INTERVAL2 (pow, 0x1.ffffffffff000p-1, 0x1p0, 0x1p50, 0x1p52, 50000)
+TEST_INTERVAL2 (pow, -0x1.ffffffffff000p-1, -0x1p0, 0x1p50, 0x1p52, 50000)
diff --git a/math/powf.c b/math/powf.c
index 05c80bb2eb670e..3f3f41ca276aaa 100644
--- a/math/powf.c
+++ b/math/powf.c
@@ -1,13 +1,14 @@
 /*
  * Single-precision pow function.
  *
- * Copyright (c) 2017-2019, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
 
 /*
 POWF_LOG2_POLY_ORDER = 5
@@ -219,3 +220,12 @@ powf (float x, float y)
 strong_alias (powf, __powf_finite)
 hidden_alias (powf, __ieee754_powf)
 #endif
+
+TEST_ULP (powf, 0.4)
+TEST_ULP_NONNEAREST (powf, 0.5)
+TEST_INTERVAL2 (powf, 0x1p-1, 0x1p1, 0x1p-7, 0x1p7, 50000)
+TEST_INTERVAL2 (powf, 0x1p-1, 0x1p1, -0x1p-7, -0x1p7, 50000)
+TEST_INTERVAL2 (powf, 0x1p-70, 0x1p70, 0x1p-1, 0x1p1, 50000)
+TEST_INTERVAL2 (powf, 0x1p-70, 0x1p70, -0x1p-1, -0x1p1, 50000)
+TEST_INTERVAL2 (powf, 0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p14, 50000)
+TEST_INTERVAL2 (powf, 0x1.ep-1, 0x1.1p0, -0x1p8, -0x1p14, 50000)
diff --git a/math/sincosf.c b/math/sincosf.c
index 446f21d60faf3a..05a71d78bb1efd 100644
--- a/math/sincosf.c
+++ b/math/sincosf.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision sin/cos function.
  *
- * Copyright (c) 2018-2021, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -9,6 +9,7 @@
 #include <math.h>
 #include "math_config.h"
 #include "sincosf.h"
+#include "test_defs.h"
 
 /* Fast sincosf implementation.  Worst-case ULP is 0.5607, maximum relative
    error is 0.5303 * 2^-23.  A single-step range reduction is used for
@@ -77,3 +78,12 @@ sincosf (float y, float *sinp, float *cosp)
 #endif
     }
 }
+
+TEST_ULP (sincosf_sinf, 0.06)
+TEST_ULP (sincosf_cosf, 0.06)
+TEST_ULP_NONNEAREST (sincosf_sinf, 0.5)
+TEST_ULP_NONNEAREST (sincosf_cosf, 0.5)
+TEST_INTERVAL (sincosf_sinf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (sincosf_sinf, 0x1p-14, 0x1p54, 50000)
+TEST_INTERVAL (sincosf_cosf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (sincosf_cosf, 0x1p-14, 0x1p54, 50000)
diff --git a/math/sincosf.h b/math/sincosf.h
index ec23ed7aeb2615..912def33d29581 100644
--- a/math/sincosf.h
+++ b/math/sincosf.h
@@ -1,7 +1,7 @@
 /*
  * Header for sinf, cosf and sincosf.
  *
- * Copyright (c) 2018-2021, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -27,9 +27,6 @@ typedef struct
 /* Polynomial data (the cosine polynomial is negated in the 2nd entry).  */
 extern const sincos_t __sincosf_table[2] HIDDEN;
 
-/* Table with 4/PI to 192 bit precision.  */
-extern const uint32_t __inv_pio4[] HIDDEN;
-
 /* Top 12 bits of the float representation with the sign bit cleared.  */
 static inline uint32_t
 abstop12 (float x)
diff --git a/math/sinf.c b/math/sinf.c
index 8dd8ae458794c5..e244e115d32b21 100644
--- a/math/sinf.c
+++ b/math/sinf.c
@@ -1,13 +1,15 @@
 /*
  * Single-precision sin function.
  *
- * Copyright (c) 2018-2021, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include "math_config.h"
 #include "sincosf.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /* Fast sinf implementation.  Worst-case ULP is 0.5607, maximum relative
    error is 0.5303 * 2^-23.  A single-step range reduction is used for
@@ -65,3 +67,9 @@ sinf (float y)
   else
     return __math_invalidf (y);
 }
+
+TEST_SIG (S, F, 1, sin, -3.1, 3.1)
+TEST_ULP (sinf, 0.06)
+TEST_ULP_NONNEAREST (sinf, 0.5)
+TEST_INTERVAL (sinf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (sinf, 0x1p-14, 0x1p54, 50000)
diff --git a/math/test/mathbench.c b/math/test/mathbench.c
index ed7e89bb7710a0..653c58fbc48477 100644
--- a/math/test/mathbench.c
+++ b/math/test/mathbench.c
@@ -1,10 +1,23 @@
 /*
  * Microbenchmark for math functions.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#if WANT_SVE_TESTS
+#  if __aarch64__ && __linux__
+#    ifdef __clang__
+#      pragma clang attribute push(__attribute__((target("sve"))),            \
+				   apply_to = any(function))
+#    else
+#      pragma GCC target("+sve")
+#    endif
+#  else
+#    error "SVE not supported - please disable WANT_SVE_TESTS"
+#  endif
+#endif
+
 #undef _GNU_SOURCE
 #define _GNU_SOURCE 1
 #include <stdint.h>
@@ -29,94 +42,6 @@ static float Af[N];
 static long measurecount = MEASURE;
 static long itercount = ITER;
 
-#ifdef __vpcs
-#include <arm_neon.h>
-typedef float64x2_t v_double;
-
-#define v_double_len() 2
-
-static inline v_double
-v_double_load (const double *p)
-{
-  return (v_double){p[0], p[1]};
-}
-
-static inline v_double
-v_double_dup (double x)
-{
-  return (v_double){x, x};
-}
-
-typedef float32x4_t v_float;
-
-#define v_float_len() 4
-
-static inline v_float
-v_float_load (const float *p)
-{
-  return (v_float){p[0], p[1], p[2], p[3]};
-}
-
-static inline v_float
-v_float_dup (float x)
-{
-  return (v_float){x, x, x, x};
-}
-#else
-/* dummy definitions to make things compile.  */
-typedef double v_double;
-typedef float v_float;
-#define v_double_len(x) 1
-#define v_double_load(x) (x)[0]
-#define v_double_dup(x) (x)
-#define v_float_len(x) 1
-#define v_float_load(x) (x)[0]
-#define v_float_dup(x) (x)
-
-#endif
-
-#if WANT_SVE_MATH
-#include <arm_sve.h>
-typedef svbool_t sv_bool;
-typedef svfloat64_t sv_double;
-
-#define sv_double_len() svcntd()
-
-static inline sv_double
-sv_double_load (const double *p)
-{
-  svbool_t pg = svptrue_b64();
-  return svld1(pg, p);
-}
-
-static inline sv_double
-sv_double_dup (double x)
-{
-  return svdup_n_f64(x);
-}
-
-typedef svfloat32_t sv_float;
-
-#define sv_float_len() svcntw()
-
-static inline sv_float
-sv_float_load (const float *p)
-{
-  svbool_t pg = svptrue_b32();
-  return svld1(pg, p);
-}
-
-static inline sv_float
-sv_float_dup (float x)
-{
-  return svdup_n_f32(x);
-}
-#else
-/* dummy definitions to make things compile.  */
-#define sv_double_len(x) 1
-#define sv_float_len(x) 1
-#endif
-
 static double
 dummy (double x)
 {
@@ -128,28 +53,28 @@ dummyf (float x)
 {
   return x;
 }
-#ifdef __vpcs
-__vpcs static v_double
-__vn_dummy (v_double x)
+#if __aarch64__ && __linux__
+__vpcs static float64x2_t
+__vn_dummy (float64x2_t x)
 {
   return x;
 }
 
-__vpcs static v_float
-__vn_dummyf (v_float x)
+__vpcs static float32x4_t
+__vn_dummyf (float32x4_t x)
 {
   return x;
 }
 #endif
-#if WANT_SVE_MATH
-static sv_double
-__sv_dummy (sv_double x, sv_bool pg)
+#if WANT_SVE_TESTS
+static svfloat64_t
+__sv_dummy (svfloat64_t x, svbool_t pg)
 {
   return x;
 }
 
-static sv_float
-__sv_dummyf (sv_float x, sv_bool pg)
+static svfloat32_t
+__sv_dummyf (svfloat32_t x, svbool_t pg)
 {
   return x;
 }
@@ -169,16 +94,17 @@ static const struct fun
   {
     double (*d) (double);
     float (*f) (float);
-#ifdef __vpcs
-    __vpcs v_double (*vnd) (v_double);
-    __vpcs v_float (*vnf) (v_float);
+#if __aarch64__ && __linux__
+    __vpcs float64x2_t (*vnd) (float64x2_t);
+    __vpcs float32x4_t (*vnf) (float32x4_t);
 #endif
-#if WANT_SVE_MATH
-    sv_double (*svd) (sv_double, sv_bool);
-    sv_float (*svf) (sv_float, sv_bool);
+#if WANT_SVE_TESTS
+    svfloat64_t (*svd) (svfloat64_t, svbool_t);
+    svfloat32_t (*svf) (svfloat32_t, svbool_t);
 #endif
   } fun;
 } funtab[] = {
+// clang-format off
 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
@@ -187,11 +113,11 @@ static const struct fun
 #define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}},
 D (dummy, 1.0, 2.0)
 F (dummyf, 1.0, 2.0)
-#ifdef __vpcs
+#if  __aarch64__ && __linux__
 VND (__vn_dummy, 1.0, 2.0)
 VNF (__vn_dummyf, 1.0, 2.0)
 #endif
-#if WANT_SVE_MATH
+#if WANT_SVE_TESTS
 SVD (__sv_dummy, 1.0, 2.0)
 SVF (__sv_dummyf, 1.0, 2.0)
 #endif
@@ -203,6 +129,7 @@ SVF (__sv_dummyf, 1.0, 2.0)
 #undef VND
 #undef SVF
 #undef SVD
+  // clang-format on
 };
 
 static void
@@ -301,75 +228,77 @@ runf_latency (float f (float))
     prev = f (Af[i] + prev * z);
 }
 
-#ifdef __vpcs
+#if  __aarch64__ && __linux__
 static void
-run_vn_thruput (__vpcs v_double f (v_double))
+run_vn_thruput (__vpcs float64x2_t f (float64x2_t))
 {
-  for (int i = 0; i < N; i += v_double_len ())
-    f (v_double_load (A+i));
+  for (int i = 0; i < N; i += 2)
+    f (vld1q_f64 (A + i));
 }
 
 static void
-runf_vn_thruput (__vpcs v_float f (v_float))
+runf_vn_thruput (__vpcs float32x4_t f (float32x4_t))
 {
-  for (int i = 0; i < N; i += v_float_len ())
-    f (v_float_load (Af+i));
+  for (int i = 0; i < N; i += 4)
+    f (vld1q_f32 (Af + i));
 }
 
 static void
-run_vn_latency (__vpcs v_double f (v_double))
+run_vn_latency (__vpcs float64x2_t f (float64x2_t))
 {
   volatile uint64x2_t vsel = (uint64x2_t) { 0, 0 };
   uint64x2_t sel = vsel;
-  v_double prev = v_double_dup (0);
-  for (int i = 0; i < N; i += v_double_len ())
-    prev = f (vbslq_f64 (sel, prev, v_double_load (A+i)));
+  float64x2_t prev = vdupq_n_f64 (0);
+  for (int i = 0; i < N; i += 2)
+    prev = f (vbslq_f64 (sel, prev, vld1q_f64 (A + i)));
 }
 
 static void
-runf_vn_latency (__vpcs v_float f (v_float))
+runf_vn_latency (__vpcs float32x4_t f (float32x4_t))
 {
   volatile uint32x4_t vsel = (uint32x4_t) { 0, 0, 0, 0 };
   uint32x4_t sel = vsel;
-  v_float prev = v_float_dup (0);
-  for (int i = 0; i < N; i += v_float_len ())
-    prev = f (vbslq_f32 (sel, prev, v_float_load (Af+i)));
+  float32x4_t prev = vdupq_n_f32 (0);
+  for (int i = 0; i < N; i += 4)
+    prev = f (vbslq_f32 (sel, prev, vld1q_f32 (Af + i)));
 }
 #endif
 
-#if WANT_SVE_MATH
+#if WANT_SVE_TESTS
 static void
-run_sv_thruput (sv_double f (sv_double, sv_bool))
+run_sv_thruput (svfloat64_t f (svfloat64_t, svbool_t))
 {
-  for (int i = 0; i < N; i += sv_double_len ())
-    f (sv_double_load (A+i), svptrue_b64 ());
+  for (int i = 0; i < N; i += svcntd ())
+    f (svld1_f64 (svptrue_b64 (), A + i), svptrue_b64 ());
 }
 
 static void
-runf_sv_thruput (sv_float f (sv_float, sv_bool))
+runf_sv_thruput (svfloat32_t f (svfloat32_t, svbool_t))
 {
-  for (int i = 0; i < N; i += sv_float_len ())
-    f (sv_float_load (Af+i), svptrue_b32 ());
+  for (int i = 0; i < N; i += svcntw ())
+    f (svld1_f32 (svptrue_b32 (), Af + i), svptrue_b32 ());
 }
 
 static void
-run_sv_latency (sv_double f (sv_double, sv_bool))
+run_sv_latency (svfloat64_t f (svfloat64_t, svbool_t))
 {
-  volatile sv_bool vsel = svptrue_b64 ();
-  sv_bool sel = vsel;
-  sv_double prev = sv_double_dup (0);
-  for (int i = 0; i < N; i += sv_double_len ())
-    prev = f (svsel_f64 (sel, sv_double_load (A+i), prev), svptrue_b64 ());
+  volatile svbool_t vsel = svptrue_b64 ();
+  svbool_t sel = vsel;
+  svfloat64_t prev = svdup_f64 (0);
+  for (int i = 0; i < N; i += svcntd ())
+    prev = f (svsel_f64 (sel, svld1_f64 (svptrue_b64 (), A + i), prev),
+	      svptrue_b64 ());
 }
 
 static void
-runf_sv_latency (sv_float f (sv_float, sv_bool))
+runf_sv_latency (svfloat32_t f (svfloat32_t, svbool_t))
 {
-  volatile sv_bool vsel = svptrue_b32 ();
-  sv_bool sel = vsel;
-  sv_float prev = sv_float_dup (0);
-  for (int i = 0; i < N; i += sv_float_len ())
-    prev = f (svsel_f32 (sel, sv_float_load (Af+i), prev), svptrue_b32 ());
+  volatile svbool_t vsel = svptrue_b32 ();
+  svbool_t sel = vsel;
+  svfloat32_t prev = svdup_f32 (0);
+  for (int i = 0; i < N; i += svcntw ())
+    prev = f (svsel_f32 (sel, svld1_f32 (svptrue_b32 (), Af + i), prev),
+	      svptrue_b32 ());
 }
 #endif
 
@@ -377,7 +306,11 @@ static uint64_t
 tic (void)
 {
   struct timespec ts;
+#if defined(_MSC_VER)
+  if (!timespec_get (&ts, TIME_UTC))
+#else
   if (clock_gettime (CLOCK_REALTIME, &ts))
+#endif
     abort ();
   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
 }
@@ -405,9 +338,11 @@ bench1 (const struct fun *f, int type, double lo, double hi)
   int vlen = 1;
 
   if (f->vec == 'n')
-    vlen = f->prec == 'd' ? v_double_len() : v_float_len();
+    vlen = f->prec == 'd' ? 2 : 4;
+#if WANT_SVE_TESTS
   else if (f->vec == 's')
-    vlen = f->prec == 'd' ? sv_double_len() : sv_float_len();
+    vlen = f->prec == 'd' ? svcntd () : svcntw ();
+#endif
 
   if (f->prec == 'd' && type == 't' && f->vec == 0)
     TIMEIT (run_thruput, f->fun.d);
@@ -417,7 +352,7 @@ bench1 (const struct fun *f, int type, double lo, double hi)
     TIMEIT (runf_thruput, f->fun.f);
   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
     TIMEIT (runf_latency, f->fun.f);
-#ifdef __vpcs
+#if __aarch64__ && __linux__
   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
     TIMEIT (run_vn_thruput, f->fun.vnd);
   else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
@@ -427,7 +362,7 @@ bench1 (const struct fun *f, int type, double lo, double hi)
   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
     TIMEIT (runf_vn_latency, f->fun.vnf);
 #endif
-#if WANT_SVE_MATH
+#if WANT_SVE_TESTS
   else if (f->prec == 'd' && type == 't' && f->vec == 's')
     TIMEIT (run_sv_thruput, f->fun.svd);
   else if (f->prec == 'd' && type == 'l' && f->vec == 's')
@@ -640,3 +575,7 @@ main (int argc, char *argv[])
     }
   return 0;
 }
+
+#if __aarch64__ && __linux__ && WANT_SVE_TESTS && defined(__clang__)
+#  pragma clang attribute pop
+#endif
diff --git a/math/test/mathbench_funcs.h b/math/test/mathbench_funcs.h
index 84c4e68650acbb..261ab02f55c3fd 100644
--- a/math/test/mathbench_funcs.h
+++ b/math/test/mathbench_funcs.h
@@ -1,27 +1,13 @@
 /*
  * Function entries for mathbench.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 /* clang-format off */
-D (exp, -9.9, 9.9)
-D (exp, 0.5, 1.0)
-D (exp10, -9.9, 9.9)
-D (exp2, -9.9, 9.9)
-D (log, 0.01, 11.1)
-D (log, 0.999, 1.001)
-D (log2, 0.01, 11.1)
-D (log2, 0.999, 1.001)
 {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
 D (xpow, 0.01, 11.1)
 D (ypow, -9.9, 9.9)
-D (erf, -6.0, 6.0)
-
-F (expf, -9.9, 9.9)
-F (exp2f, -9.9, 9.9)
-F (logf, 0.01, 11.1)
-F (log2f, 0.01, 11.1)
 {"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
 F (xpowf, 0.01, 11.1)
 F (ypowf, -9.9, 9.9)
@@ -31,32 +17,105 @@ F (ypowf, -9.9, 9.9)
 {"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
 {"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
 {"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
-F (sinf, 0.1, 0.7)
-F (sinf, 0.8, 3.1)
-F (sinf, -3.1, 3.1)
-F (sinf, 3.3, 33.3)
-F (sinf, 100, 1000)
-F (sinf, 1e6, 1e32)
-F (cosf, 0.1, 0.7)
-F (cosf, 0.8, 3.1)
-F (cosf, -3.1, 3.1)
-F (cosf, 3.3, 33.3)
-F (cosf, 100, 1000)
-F (cosf, 1e6, 1e32)
-F (erff, -4.0, 4.0)
-#ifdef __vpcs
-VND (_ZGVnN2v_exp, -9.9, 9.9)
-VND (_ZGVnN2v_log, 0.01, 11.1)
-{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
-VND (_ZGVnN2v_sin, -3.1, 3.1)
-VND (_ZGVnN2v_cos, -3.1, 3.1)
-VNF (_ZGVnN4v_expf, -9.9, 9.9)
+#if WANT_TRIGPI_TESTS
+F (arm_math_cospif, -0.9, 0.9)
+D (arm_math_cospi, -0.9, 0.9)
+F (arm_math_sinpif, -0.9, 0.9)
+D (arm_math_sinpi, -0.9, 0.9)
+F (arm_math_tanpif, -0.9, 0.9)
+D (arm_math_tanpi, -0.9, 0.9)
+{"sincospif", 'f', 0, -0.9, 0.9, {.f = sincospif_wrap}},
+{"sincospi", 'd', 0, -0.9, 0.9, {.d = sincospi_wrap}},
+#endif
+#if WANT_EXPERIMENTAL_MATH
+D (arm_math_erf, -6.0, 6.0)
+F (arm_math_erff, -4.0, 4.0)
+{"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}},
+{"atan2",  'd', 0, -10.0, 10.0, {.d = atan2_wrap}},
+{"powi",   'd', 0,  0.01, 11.1, {.d = powi_wrap}},
+#endif
+#if __aarch64__ && __linux__
+{"_ZGVnN4vv_atan2f", 'f', 'n', -10.0, 10.0, {.vnf = _Z_atan2f_wrap}},
+{"_ZGVnN2vv_atan2",  'd', 'n', -10.0, 10.0, {.vnd = _Z_atan2_wrap}},
+{"_ZGVnN4vv_hypotf", 'f', 'n', -10.0, 10.0, {.vnf = _Z_hypotf_wrap}},
+{"_ZGVnN2vv_hypot",  'd', 'n', -10.0, 10.0, {.vnd = _Z_hypot_wrap}},
+{"_ZGVnN2vv_pow",    'd', 'n', -10.0, 10.0, {.vnd = xy_Z_pow}},
+{"x_ZGVnN2vv_pow",   'd', 'n', -10.0, 10.0, {.vnd = x_Z_pow}},
+{"y_ZGVnN2vv_pow",   'd', 'n', -10.0, 10.0, {.vnd = y_Z_pow}},
+{"_ZGVnN4vv_powf",  'f', 'n',   0.01, 11.1, {.vnf = xy_Z_powf}},
+{"x_ZGVnN4vv_powf", 'f', 'n',   0.01, 11.1, {.vnf = x_Z_powf}},
+{"y_ZGVnN4vv_powf", 'f', 'n', -10.0,  10.0, {.vnf = y_Z_powf}},
+{"_ZGVnN4vl4_modff", 'f', 'n', -10.0, 10.0, {.vnf = _Z_modff_wrap}},
+{"_ZGVnN2vl8_modf",  'd', 'n', -10.0, 10.0, {.vnd = _Z_modf_wrap}},
+{"_ZGVnN4vl4l4_sincosf", 'f', 'n', -3.1, 3.1, {.vnf = _Z_sincosf_wrap}},
+{"_ZGVnN2vl8l8_sincos", 'd', 'n', -3.1, 3.1, {.vnd = _Z_sincos_wrap}},
+{"_ZGVnN4v_cexpif", 'f', 'n', -3.1, 3.1, {.vnf = _Z_cexpif_wrap}},
+{"_ZGVnN2v_cexpi", 'd', 'n', -3.1, 3.1, {.vnd = _Z_cexpi_wrap}},
 VNF (_ZGVnN4v_expf_1u, -9.9, 9.9)
-VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
 VNF (_ZGVnN4v_exp2f_1u, -9.9, 9.9)
-VNF (_ZGVnN4v_logf, 0.01, 11.1)
-{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
-VNF (_ZGVnN4v_sinf, -3.1, 3.1)
-VNF (_ZGVnN4v_cosf, -3.1, 3.1)
+# if WANT_TRIGPI_TESTS
+VNF (_ZGVnN4v_cospif, -0.9, 0.9)
+VND (_ZGVnN2v_cospi, -0.9, 0.9)
+VNF (_ZGVnN4v_sinpif, -0.9, 0.9)
+VND (_ZGVnN2v_sinpi, -0.9, 0.9)
+VNF (_ZGVnN4v_tanpif, -0.9, 0.9)
+VND (_ZGVnN2v_tanpi, -0.9, 0.9)
+{"_ZGVnN4vl4l4_sincospif", 'f', 'n', -0.9, 0.9, {.vnf = _Z_sincospif_wrap}},
+{"_ZGVnN2vl8l8_sincospi", 'd', 'n', -0.9, 0.9, {.vnd = _Z_sincospi_wrap}},
+# endif
+#endif
+
+#if WANT_SVE_TESTS
+{ "_ZGVsMxvv_atan2f", 'f', 's', -10.0, 10.0, { .svf = _Z_sv_atan2f_wrap } },
+{ "_ZGVsMxvv_atan2", 'd', 's', -10.0, 10.0, { .svd = _Z_sv_atan2_wrap } },
+{ "_ZGVsMxvv_hypotf", 'f', 's', -10.0, 10.0, { .svf = _Z_sv_hypotf_wrap } },
+{ "_ZGVsMxvv_hypot", 'd', 's', -10.0, 10.0, { .svd = _Z_sv_hypot_wrap } },
+{"_ZGVsMxvv_powf",   'f', 's', -10.0, 10.0, {.svf = xy_Z_sv_powf}},
+{"x_ZGVsMxvv_powf",  'f', 's', -10.0, 10.0, {.svf = x_Z_sv_powf}},
+{"y_ZGVsMxvv_powf",  'f', 's', -10.0, 10.0, {.svf = y_Z_sv_powf}},
+{"_ZGVsMxvv_pow",    'd', 's', -10.0, 10.0, {.svd = xy_Z_sv_pow}},
+{"x_ZGVsMxvv_pow",   'd', 's', -10.0, 10.0, {.svd = x_Z_sv_pow}},
+{"y_ZGVsMxvv_pow",   'd', 's', -10.0, 10.0, {.svd = y_Z_sv_pow}},
+{"_ZGVsMxvl4_modff", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_modff_wrap}},
+{"_ZGVsMxvl8_modf",  'd', 's', -10.0, 10.0, {.svd = _Z_sv_modf_wrap}},
+{"_ZGVsMxvl4l4_sincosf", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_sincosf_wrap}},
+{"_ZGVsMxvl8l8_sincos", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_sincos_wrap}},
+{"_ZGVsMxv_cexpif", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_cexpif_wrap}},
+{"_ZGVsMxv_cexpi", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_cexpi_wrap}},
+# if WANT_TRIGPI_TESTS
+SVF (_ZGVsMxv_cospif, -0.9, 0.9)
+SVD (_ZGVsMxv_cospi, -0.9, 0.9)
+SVF (_ZGVsMxv_sinpif, -0.9, 0.9)
+SVD (_ZGVsMxv_sinpi, -0.9, 0.9)
+SVF (_ZGVsMxv_tanpif, -0.9, 0.9)
+SVD (_ZGVsMxv_tanpi, -0.9, 0.9)
+{"_ZGVsMxvl4l4_sincospif", 'f', 's', -0.9, 0.9, {.svf = _Z_sv_sincospif_wrap}},
+{"_ZGVsMxvl8l8_sincospi", 'd', 's', -0.9, 0.9, {.svd = _Z_sv_sincospi_wrap}},
+# endif
+# if WANT_EXPERIMENTAL_MATH
+{"_ZGVsMxvv_powi",   'f', 's', -10.0, 10.0, {.svf = _Z_sv_powi_wrap}},
+{"_ZGVsMxvv_powk",   'd', 's', -10.0, 10.0, {.svd = _Z_sv_powk_wrap}},
+# endif
 #endif
-  /* clang-format on */
+    /* clang-format on */
+
+#define _ZSF1(fun, a, b) F (fun##f, a, b)
+#define _ZSD1(f, a, b) D (f, a, b)
+
+#define _ZVF1(fun, a, b) VNF (_ZGVnN4v_##fun##f, a, b)
+#define _ZVD1(f, a, b) VND (_ZGVnN2v_##f, a, b)
+
+#define _ZSVF1(fun, a, b) SVF (_ZGVsMxv_##fun##f, a, b)
+#define _ZSVD1(f, a, b) SVD (_ZGVsMxv_##f, a, b)
+
+/* No auto-generated wrappers for binary functions - they have be
+   manually defined in mathbench_wrappers.h. We have to define silent
+   macros for them anyway as they will be emitted by TEST_SIG.  */
+#define _ZSF2(...)
+#define _ZSD2(...)
+#define _ZVF2(...)
+#define _ZVD2(...)
+#define _ZSVF2(...)
+#define _ZSVD2(...)
+
+#include "test/mathbench_funcs_gen.h"
diff --git a/math/test/mathbench_wrappers.h b/math/test/mathbench_wrappers.h
index 062b9db56de51a..32dcee36530a44 100644
--- a/math/test/mathbench_wrappers.h
+++ b/math/test/mathbench_wrappers.h
@@ -1,24 +1,314 @@
 /*
  * Function wrappers for mathbench.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifdef __vpcs
+#if WANT_EXPERIMENTAL_MATH
+static double
+atan2_wrap (double x)
+{
+  return atan2 (5.0, x);
+}
+
+static float
+atan2f_wrap (float x)
+{
+  return atan2f (5.0f, x);
+}
+
+static double
+powi_wrap (double x)
+{
+  return __builtin_powi (x, (int) round (x));
+}
+#endif /* WANT_EXPERIMENTAL_MATH.  */
+
+#if __aarch64__ && __linux__
+
+__vpcs static float32x4_t
+_Z_sincospif_wrap (float32x4_t x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincospif (x, s, c);
+  return vld1q_f32 (s) + vld1q_f32 (c);
+}
+
+__vpcs static float64x2_t
+_Z_sincospi_wrap (float64x2_t x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincospi (x, s, c);
+  return vld1q_f64 (s) + vld1q_f64 (c);
+}
 
-__vpcs static v_float
-xy_Z_powf (v_float x)
+__vpcs static float64x2_t
+_Z_atan2_wrap (float64x2_t x)
+{
+  return _ZGVnN2vv_atan2 (vdupq_n_f64 (5.0), x);
+}
+
+__vpcs static float32x4_t
+_Z_atan2f_wrap (float32x4_t x)
+{
+  return _ZGVnN4vv_atan2f (vdupq_n_f32 (5.0f), x);
+}
+
+__vpcs static float32x4_t
+_Z_hypotf_wrap (float32x4_t x)
+{
+  return _ZGVnN4vv_hypotf (vdupq_n_f32 (5.0f), x);
+}
+
+__vpcs static float64x2_t
+_Z_hypot_wrap (float64x2_t x)
+{
+  return _ZGVnN2vv_hypot (vdupq_n_f64 (5.0), x);
+}
+
+__vpcs static float32x4_t
+xy_Z_powf (float32x4_t x)
 {
   return _ZGVnN4vv_powf (x, x);
 }
 
-__vpcs static v_double
-xy_Z_pow (v_double x)
+__vpcs static float32x4_t
+x_Z_powf (float32x4_t x)
+{
+  return _ZGVnN4vv_powf (x, vdupq_n_f32 (23.4));
+}
+
+__vpcs static float32x4_t
+y_Z_powf (float32x4_t x)
+{
+  return _ZGVnN4vv_powf (vdupq_n_f32 (2.34), x);
+}
+
+__vpcs static float64x2_t
+xy_Z_pow (float64x2_t x)
 {
   return _ZGVnN2vv_pow (x, x);
 }
 
+__vpcs static float64x2_t
+x_Z_pow (float64x2_t x)
+{
+  return _ZGVnN2vv_pow (x, vdupq_n_f64 (23.4));
+}
+
+__vpcs static float64x2_t
+y_Z_pow (float64x2_t x)
+{
+  return _ZGVnN2vv_pow (vdupq_n_f64 (2.34), x);
+}
+
+__vpcs static float32x4_t
+_Z_modff_wrap (float32x4_t x)
+{
+  float y[4];
+  float32x4_t ret = _ZGVnN4vl4_modff (x, y);
+  return ret + vld1q_f32 (y);
+}
+
+__vpcs static float64x2_t
+_Z_modf_wrap (float64x2_t x)
+{
+  double y[2];
+  float64x2_t ret = _ZGVnN2vl8_modf (x, y);
+  return ret + vld1q_f64 (y);
+}
+
+__vpcs static float32x4_t
+_Z_sincosf_wrap (float32x4_t x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincosf (x, s, c);
+  return vld1q_f32 (s) + vld1q_f32 (c);
+}
+
+__vpcs static float32x4_t
+_Z_cexpif_wrap (float32x4_t x)
+{
+  float32x4x2_t sc = _ZGVnN4v_cexpif (x);
+  return sc.val[0] + sc.val[1];
+}
+
+__vpcs static float64x2_t
+_Z_sincos_wrap (float64x2_t x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincos (x, s, c);
+  return vld1q_f64 (s) + vld1q_f64 (c);
+}
+
+__vpcs static float64x2_t
+_Z_cexpi_wrap (float64x2_t x)
+{
+  float64x2x2_t sc = _ZGVnN2v_cexpi (x);
+  return sc.val[0] + sc.val[1];
+}
+
+#endif
+
+#if WANT_SVE_TESTS
+
+static svfloat32_t
+_Z_sv_atan2f_wrap (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_atan2f (x, svdup_f32 (5.0f), pg);
+}
+
+static svfloat64_t
+_Z_sv_atan2_wrap (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_atan2 (x, svdup_f64 (5.0), pg);
+}
+
+static svfloat32_t
+_Z_sv_hypotf_wrap (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_hypotf (x, svdup_f32 (5.0), pg);
+}
+
+static svfloat64_t
+_Z_sv_hypot_wrap (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_hypot (x, svdup_f64 (5.0), pg);
+}
+
+static svfloat32_t
+xy_Z_sv_powf (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powf (x, x, pg);
+}
+
+static svfloat32_t
+x_Z_sv_powf (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powf (x, svdup_f32 (23.4f), pg);
+}
+
+static svfloat32_t
+y_Z_sv_powf (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powf (svdup_f32 (2.34f), x, pg);
+}
+
+static svfloat64_t
+xy_Z_sv_pow (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_pow (x, x, pg);
+}
+
+static svfloat64_t
+x_Z_sv_pow (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_pow (x, svdup_f64 (23.4), pg);
+}
+
+static svfloat64_t
+y_Z_sv_pow (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_pow (svdup_f64 (2.34), x, pg);
+}
+
+static svfloat32_t
+_Z_sv_sincospif_wrap (svfloat32_t x, svbool_t pg)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincospif (x, s, c, pg);
+  return svadd_x (pg, svld1 (pg, s), svld1 (pg, c));
+}
+
+static svfloat64_t
+_Z_sv_sincospi_wrap (svfloat64_t x, svbool_t pg)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincospi (x, s, c, pg);
+  return svadd_x (pg, svld1 (pg, s), svld1 (pg, c));
+}
+
+static svfloat32_t
+_Z_sv_modff_wrap (svfloat32_t x, svbool_t pg)
+{
+  float i[svcntw ()];
+  svfloat32_t r = _ZGVsMxvl4_modff (x, i, pg);
+  return svadd_x (pg, r, svld1 (pg, i));
+}
+
+static svfloat64_t
+_Z_sv_modf_wrap (svfloat64_t x, svbool_t pg)
+{
+  double i[svcntd ()];
+  svfloat64_t r = _ZGVsMxvl8_modf (x, i, pg);
+  return svadd_x (pg, r, svld1 (pg, i));
+}
+
+static svfloat32_t
+_Z_sv_sincosf_wrap (svfloat32_t x, svbool_t pg)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincosf (x, s, c, pg);
+  return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
+}
+
+static svfloat32_t
+_Z_sv_cexpif_wrap (svfloat32_t x, svbool_t pg)
+{
+  svfloat32x2_t sc = _ZGVsMxv_cexpif (x, pg);
+  return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
+}
+
+static svfloat64_t
+_Z_sv_sincos_wrap (svfloat64_t x, svbool_t pg)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincos (x, s, c, pg);
+  return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
+}
+
+static svfloat64_t
+_Z_sv_cexpi_wrap (svfloat64_t x, svbool_t pg)
+{
+  svfloat64x2_t sc = _ZGVsMxv_cexpi (x, pg);
+  return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
+}
+
+# if WANT_EXPERIMENTAL_MATH
+
+static svfloat32_t
+_Z_sv_powi_wrap (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powi (x, svcvt_s32_f32_x (pg, x), pg);
+}
+
+static svfloat64_t
+_Z_sv_powk_wrap (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powk (x, svcvt_s64_f64_x (pg, x), pg);
+}
+
+# endif
+
+#endif
+
+#if __aarch64__
+static float
+sincospif_wrap (float x)
+{
+  float s, c;
+  arm_math_sincospif (x, &s, &c);
+  return s + c;
+}
+
+static double
+sincospi_wrap (double x)
+{
+  double s, c;
+  arm_math_sincospi (x, &s, &c);
+  return s + c;
+}
 #endif
 
 static double
diff --git a/math/test/mathtest.c b/math/test/mathtest.c
index 834233fdde9da7..6e81f0d7b6340d 100644
--- a/math/test/mathtest.c
+++ b/math/test/mathtest.c
@@ -1,10 +1,12 @@
 /*
  * mathtest.c - test rig for mathlib
  *
- * Copyright (c) 1998-2023, Arm Limited.
+ * Copyright (c) 1998-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
+/* clang-format off */
 
+#define _GNU_SOURCE
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -196,11 +198,9 @@ int is_complex_rettype(int rettype) {
 #define TFUNCARM(arg,ret,name,tolerance) { t_func, arg, ret, (void*)& ARM_PREFIX(name), m_none, tolerance, #name }
 #define MFUNC(arg,ret,name,tolerance) { t_macro, arg, ret, NULL, m_##name, tolerance, #name }
 
-#ifndef PL
 /* sincosf wrappers for easier testing.  */
 static float sincosf_sinf(float x) { float s,c; sincosf(x, &s, &c); return s; }
 static float sincosf_cosf(float x) { float s,c; sincosf(x, &s, &c); return c; }
-#endif
 
 test_func tfuncs[] = {
     /* trigonometric */
@@ -220,10 +220,9 @@ test_func tfuncs[] = {
     TFUNCARM(at_s,rt_s, tanf, 4*ULPUNIT),
     TFUNCARM(at_s,rt_s, sinf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, cosf, 3*ULPUNIT/4),
-#ifndef PL
     TFUNCARM(at_s,rt_s, sincosf_sinf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, sincosf_cosf, 3*ULPUNIT/4),
-#endif
+
     /* hyperbolic */
     TFUNC(at_d, rt_d, atanh, 4*ULPUNIT),
     TFUNC(at_d, rt_d, asinh, 4*ULPUNIT),
@@ -254,7 +253,9 @@ test_func tfuncs[] = {
     TFUNCARM(at_s,rt_s, expf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, exp2f, 3*ULPUNIT/4),
     TFUNC(at_s,rt_s, expm1f, ULPUNIT),
+#if WANT_EXP10_TESTS
     TFUNC(at_d,rt_d, exp10, ULPUNIT),
+#endif
 
     /* power */
     TFUNC(at_d2,rt_d, pow, 3*ULPUNIT/4),
@@ -1707,3 +1708,4 @@ void undef_func() {
     failed++;
     puts("ERROR: undefined function called");
 }
+/* clang-format on */
diff --git a/math/test/rtest/dotest.c b/math/test/rtest/dotest.c
index 5b3e9b4f18e467..dd8ceb068141b0 100644
--- a/math/test/rtest/dotest.c
+++ b/math/test/rtest/dotest.c
@@ -1,7 +1,7 @@
 /*
  * dotest.c - actually generate mathlib test cases
  *
- * Copyright (c) 1999-2019, Arm Limited.
+ * Copyright (c) 1999-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -18,6 +18,35 @@
 
 #define MPFR_PREC 96 /* good enough for float or double + a few extra bits */
 
+#if MPFR_VERSION < MPFR_VERSION_NUM(4, 2, 0)
+int
+mpfr_tanpi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd)
+{
+  MPFR_DECL_INIT (frd, MPFR_PREC);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_tan (ret, frd, GMP_RNDN);
+}
+
+int
+mpfr_sinpi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd)
+{
+  MPFR_DECL_INIT (frd, MPFR_PREC);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_sin (ret, frd, GMP_RNDN);
+}
+
+int
+mpfr_cospi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd)
+{
+  MPFR_DECL_INIT (frd, MPFR_PREC);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_cos (ret, frd, GMP_RNDN);
+}
+#endif
+
 extern int lib_fo, lib_no_arith, ntests;
 
 /*
@@ -454,6 +483,7 @@ void universal_wrapper(wrapperctx *ctx)
     }
 }
 
+/* clang-format off */
 Testable functions[] = {
     /*
      * Trig functions: sin, cos, tan. We test the core function
@@ -479,6 +509,18 @@ Testable functions[] = {
         cases_uniform_float, 0x39800000, 0x41800000},
     {"sincosf_cosf", (funcptr)mpfr_cos, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x41800000},
+    {"sinpi", (funcptr)mpfr_sinpi, args1, {NULL},
+        cases_uniform, 0x3e400000, 0x40300000},
+    {"sinpif", (funcptr)mpfr_sinpi, args1f, {NULL},
+        cases_uniform_float, 0x39800000, 0x41800000},
+    {"cospi", (funcptr)mpfr_cospi, args1, {NULL},
+        cases_uniform, 0x3e400000, 0x40300000},
+    {"cospif", (funcptr)mpfr_cospi, args1f, {NULL},
+        cases_uniform_float, 0x39800000, 0x41800000},
+    {"tanpi", (funcptr)mpfr_tanpi, args1, {NULL},
+        cases_uniform, 0x3e400000, 0x40300000},
+    {"tanpif", (funcptr)mpfr_tanpi, args1f, {NULL},
+        cases_uniform_float, 0x39800000, 0x41800000},
     /*
      * Inverse trig: asin, acos. Between 1 and -1, of course. acos
      * goes down to 2^-54, asin to 2^-27.
@@ -708,6 +750,7 @@ Testable functions[] = {
     {"tgammaf", (funcptr)mpfr_gamma, args1f, {NULL}, cases_uniform_float, 0x2f800000, 0x43000000},
     {"tgamma", (funcptr)mpfr_gamma, args1, {NULL}, cases_uniform, 0x3c000000, 0x40800000},
 };
+/* clang-format on */
 
 const int nfunctions = ( sizeof(functions)/sizeof(*functions) );
 
diff --git a/math/test/runulp.sh b/math/test/runulp.sh
index e2e03e3ae76196..672908f355c409 100755
--- a/math/test/runulp.sh
+++ b/math/test/runulp.sh
@@ -2,7 +2,7 @@
 
 # ULP error check script.
 #
-# Copyright (c) 2019-2023, Arm Limited.
+# Copyright (c) 2019-2024, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 #set -x
@@ -20,260 +20,83 @@ FAIL=0
 PASS=0
 
 t() {
-	[ $r = "n" ] && Lt=$L || Lt=$Ldir
-	$emu ./ulp -r $r -e $Lt $flags "$@" && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+    # First argument: routine name
+    routine=$1; shift
+    # Second and third argument: lo and hi bounds
+    # Extra processing needed for bivariate routines
+    IFS=',' read -ra LO <<< "$1"; shift
+    IFS=',' read -ra HI <<< "$1"; shift
+    ITV="${LO[0]} ${HI[0]}"
+    for i in "${!LO[@]}"; do
+	[[ "$i" -eq "0" ]] || ITV="$ITV x ${LO[$i]} ${HI[$i]}"
+    done
+    # Fourth argument: number of test points
+    n=$1; shift
+    # Any remaining arguments forwards directly to ulp tool
+    extra_flags="$@"
+
+    # Read ULP limits, fenv expectation and control values from autogenerated files
+    limits_file=$LIMITS
+    [ $r == "n" ] || limits_file=${limits_file}_nn
+    L=$(grep "^$routine " $limits_file | awk '{print $2}')
+    [ -n "$L" ] || { echo ERROR: Could not determine ULP limit for $routine in $limits_file && false; }
+    cvals=($(grep "^$routine " $CVALS | awk '{print $2}'))
+
+    if grep -q "^$routine$" $DISABLE_FENV; then extra_flags="$extra_flags -f"; fi 
+    # Emulate a do-while loop to loop over cvals, but still execute once if it is empty
+    while : ; do
+	# Empty string if we are at the end of cvals array
+	c_arg=""
+	[ -z "${cvals[0]:-}" ] || c_arg="-c ${cvals[0]}"
+	$emu ./ulp -e $L $flags $extra_flags -r $r $c_arg $routine $ITV $n && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+	# Shift cvals by 1, and break if it is now empty
+	cvals=("${cvals[@]:1}")
+	[ -n "${cvals[0]:-}" ] || break
+    done
+
+    # Run ULP tool
+
 }
 
 check() {
-	$emu ./ulp -f -q "$@" >/dev/null
+	$emu ./ulp -f -q "$@"
 }
 
-Ldir=0.5
+if [[ $WANT_EXPERIMENTAL_MATH -eq 1 ]] && [[ $WANT_SVE_TESTS -eq 1 ]] && [[ $USE_MPFR -eq 0 ]]; then
+    # No guarantees about powi accuracy, so regression-test for exactness
+    # w.r.t. the custom reference impl in ulp_wrappers.h
+    if [ -z "$FUNC" ] || [ "$FUNC" == "_ZGVsMxvv_powi" ]; then
+	check -q -f -e 0 _ZGVsMxvv_powi  0  inf x  0  1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x  0  1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powi  0  inf x -0 -1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x -0 -1000 100000
+    fi
+    if [ -z "$FUNC" ] || [ "$FUNC" == "_ZGVsMxvv_powk" ]; then
+	check -q -f -e 0 _ZGVsMxvv_powk  0  inf x  0  1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x  0  1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powk  0  inf x -0 -1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x -0 -1000 100000
+    fi
+fi
+
+# Test generic routines in all rounding modes
 for r in $rmodes
 do
-L=0.01
-t exp  0 0xffff000000000000 10000
-t exp  0x1p-6     0x1p6     40000
-t exp -0x1p-6    -0x1p6     40000
-t exp  633.3      733.3     10000
-t exp -633.3     -777.3     10000
-
-L=0.01
-t exp2  0 0xffff000000000000 10000
-t exp2  0x1p-6     0x1p6     40000
-t exp2 -0x1p-6    -0x1p6     40000
-t exp2  633.3      733.3     10000
-t exp2 -633.3     -777.3     10000
-
-L=0.02
-t log  0 0xffff000000000000 10000
-t log  0x1p-4    0x1p4      40000
-t log  0         inf        40000
-
-L=0.05
-t log2  0 0xffff000000000000 10000
-t log2  0x1p-4    0x1p4      40000
-t log2  0         inf        40000
-
-L=0.05
-t pow  0.5  2.0  x  0  inf 20000
-t pow -0.5 -2.0  x  0  inf 20000
-t pow  0.5  2.0  x -0 -inf 20000
-t pow -0.5 -2.0  x -0 -inf 20000
-t pow  0.5  2.0  x  0x1p-10  0x1p10  40000
-t pow  0.5  2.0  x -0x1p-10 -0x1p10  40000
-t pow  0    inf  x    0.5      2.0   80000
-t pow  0    inf  x   -0.5     -2.0   80000
-t pow  0x1.fp-1   0x1.08p0  x  0x1p8 0x1p17  80000
-t pow  0x1.fp-1   0x1.08p0  x -0x1p8 -0x1p17 80000
-t pow  0         0x1p-1000  x  0 1.0 50000
-t pow  0x1p1000        inf  x  0 1.0 50000
-t pow  0x1.ffffffffffff0p-1  0x1.0000000000008p0 x 0x1p60 0x1p68 50000
-t pow  0x1.ffffffffff000p-1  0x1p0 x 0x1p50 0x1p52 50000
-t pow -0x1.ffffffffff000p-1 -0x1p0 x 0x1p50 0x1p52 50000
-
-L=0.02
-t exp10   0                   0x1p-47             5000
-t exp10  -0                  -0x1p-47             5000
-t exp10   0x1p-47             1                   50000
-t exp10  -0x1p-47            -1                   50000
-t exp10   1                   0x1.34413509f79ffp8 50000
-t exp10  -1                  -0x1.434e6420f4374p8 50000
-t exp10  0x1.34413509f79ffp8  inf                 5000
-t exp10 -0x1.434e6420f4374p8 -inf                 5000
-
-L=1.0
-Ldir=0.9
-t erf  0 0xffff000000000000 10000
-t erf  0x1p-1022  0x1p-26   40000
-t erf  -0x1p-1022 -0x1p-26  40000
-t erf  0x1p-26    0x1p3     40000
-t erf  -0x1p-26  -0x1p3     40000
-t erf  0         inf        40000
-Ldir=0.5
-
-L=0.01
-t expf  0    0xffff0000    10000
-t expf  0x1p-14   0x1p8    50000
-t expf -0x1p-14  -0x1p8    50000
-
-L=0.01
-t exp2f  0    0xffff0000   10000
-t exp2f  0x1p-14   0x1p8   50000
-t exp2f -0x1p-14  -0x1p8   50000
-
-L=0.32
-t logf  0    0xffff0000    10000
-t logf  0x1p-4    0x1p4    50000
-t logf  0         inf      50000
-
-L=0.26
-t log2f  0    0xffff0000   10000
-t log2f  0x1p-4    0x1p4   50000
-t log2f  0         inf     50000
-
-L=0.06
-t sinf  0    0xffff0000    10000
-t sinf  0x1p-14  0x1p54    50000
-t sinf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t cosf  0    0xffff0000    10000
-t cosf  0x1p-14  0x1p54    50000
-t cosf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t sincosf_sinf  0    0xffff0000    10000
-t sincosf_sinf  0x1p-14  0x1p54    50000
-t sincosf_sinf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t sincosf_cosf  0    0xffff0000    10000
-t sincosf_cosf  0x1p-14  0x1p54    50000
-t sincosf_cosf -0x1p-14 -0x1p54    50000
-
-L=0.4
-t powf  0x1p-1   0x1p1  x  0x1p-7 0x1p7   50000
-t powf  0x1p-1   0x1p1  x -0x1p-7 -0x1p7  50000
-t powf  0x1p-70 0x1p70  x  0x1p-1 0x1p1   50000
-t powf  0x1p-70 0x1p70  x  -0x1p-1 -0x1p1 50000
-t powf  0x1.ep-1 0x1.1p0 x  0x1p8 0x1p14  50000
-t powf  0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
-
-L=0.6
-Ldir=0.9
-t erff  0      0xffff0000 10000
-t erff  0x1p-127  0x1p-26 40000
-t erff -0x1p-127 -0x1p-26 40000
-t erff  0x1p-26   0x1p3   40000
-t erff -0x1p-26  -0x1p3   40000
-t erff  0         inf     40000
-Ldir=0.5
-
+  while read F LO HI N
+  do
+	[[ -z $F ]] || t $F $LO $HI $N
+  done << EOF
+$(grep "\b$FUNC\b" $GEN_ITVS)
+EOF
 done
 
-# vector functions
-
-Ldir=0.5
-r='n'
-flags="${ULPFLAGS:--q}"
-
-range_exp='
-  0 0xffff000000000000 10000
-  0x1p-6     0x1p6     400000
- -0x1p-6    -0x1p6     400000
-  633.3      733.3     10000
- -633.3     -777.3     10000
-'
-
-range_log='
-  0 0xffff000000000000 10000
-  0x1p-4     0x1p4     400000
-  0          inf       400000
-'
-
-range_pow='
- 0x1p-1   0x1p1  x  0x1p-10 0x1p10   50000
- 0x1p-1   0x1p1  x -0x1p-10 -0x1p10  50000
- 0x1p-500 0x1p500  x  0x1p-1 0x1p1   50000
- 0x1p-500 0x1p500  x  -0x1p-1 -0x1p1 50000
- 0x1.ep-1 0x1.1p0 x  0x1p8 0x1p16    50000
- 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16   50000
-'
-
-range_sin='
-  0       0x1p23     500000
- -0      -0x1p23     500000
-  0x1p23  inf        10000
- -0x1p23 -inf        10000
-'
-range_cos="$range_sin"
-
-range_expf='
-  0    0xffff0000    10000
-  0x1p-14   0x1p8    500000
- -0x1p-14  -0x1p8    500000
-'
-
-range_expf_1u="$range_expf"
-range_exp2f="$range_expf"
-range_exp2f_1u="$range_expf"
-
-range_logf='
- 0    0xffff0000    10000
- 0x1p-4    0x1p4    500000
-'
-
-range_sinf='
-  0        0x1p20   500000
- -0       -0x1p20   500000
-  0x1p20   inf      10000
- -0x1p20  -inf      10000
-'
-range_cosf="$range_sinf"
-
-range_powf='
- 0x1p-1   0x1p1  x  0x1p-7 0x1p7   50000
- 0x1p-1   0x1p1  x -0x1p-7 -0x1p7  50000
- 0x1p-70 0x1p70  x  0x1p-1 0x1p1   50000
- 0x1p-70 0x1p70  x  -0x1p-1 -0x1p1 50000
- 0x1.ep-1 0x1.1p0 x  0x1p8 0x1p14  50000
- 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
-'
-
-# error limits
-L_exp=1.9
-L_log=1.2
-L_pow=0.05
-L_sin=3.0
-L_cos=3.0
-L_expf=1.49
-L_expf_1u=0.4
-L_exp2f=1.49
-L_exp2f_1u=0.4
-L_logf=2.9
-L_sinf=1.4
-L_cosf=1.4
-L_powf=2.1
-
-while read G F D
+# Only test arch-specific routines in round-to-nearest, with sign of zero ignored (-z flag)
+r=n
+while read F LO HI N
 do
-	case "$G" in \#*) continue ;; esac
-	eval range="\${range_$G}"
-	eval L="\${L_$G}"
-	while read X
-	do
-		[ -n "$X" ] || continue
-		case "$X" in \#*) continue ;; esac
-		disable_fenv=""
-		if [ -z "$WANT_SIMD_EXCEPT" ] || [ $WANT_SIMD_EXCEPT -eq 0 ]; then
-			# If library was built with SIMD exceptions
-			# disabled, disable fenv checking in ulp
-			# tool. Otherwise, fenv checking may still be
-			# disabled by adding -f to the end of the run
-			# line.
-			disable_fenv="-f"
-		fi
-		t $D $disable_fenv $F $X
-	done << EOF
-$range
-
-EOF
+	[[ -z $F ]] || t $F $LO $HI $N -z
 done << EOF
-# group symbol run
-exp       _ZGVnN2v_exp
-log       _ZGVnN2v_log
-pow       _ZGVnN2vv_pow      -f
-sin       _ZGVnN2v_sin       -z
-cos       _ZGVnN2v_cos
-expf      _ZGVnN4v_expf
-expf_1u   _ZGVnN4v_expf_1u   -f
-exp2f     _ZGVnN4v_exp2f
-exp2f_1u  _ZGVnN4v_exp2f_1u  -f
-logf      _ZGVnN4v_logf
-sinf      _ZGVnN4v_sinf      -z
-cosf      _ZGVnN4v_cosf
-powf      _ZGVnN4vv_powf     -f
+$(grep "\b$FUNC\b" $ARCH_ITVS)
 EOF
 
 [ 0 -eq $FAIL ] || {
diff --git a/math/test/test_defs.h b/math/test/test_defs.h
new file mode 100644
index 00000000000000..d0656c9e1d84d2
--- /dev/null
+++ b/math/test/test_defs.h
@@ -0,0 +1,31 @@
+/*
+ * Helper macros for emitting various details about routines for consumption by
+ * runulp.sh.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+#define TEST_ULP(f, l) TEST_ULP f l
+#define TEST_ULP_NONNEAREST(f, l) TEST_ULP_NONNEAREST f l
+
+/* Emit routine name if e == 0 and f is expected to correctly trigger fenv
+   exceptions. e allows declaration to be emitted conditionally on
+   WANT_SIMD_EXCEPT - defer expansion by one pass to allow those flags to be
+   expanded properly.  */
+#define TEST_DISABLE_FENV(f) TEST_DISABLE_FENV f
+#define TEST_DISABLE_FENV_IF_NOT(f, e) TEST_DISABLE_FENV_IF_NOT_ (f, e)
+#define TEST_DISABLE_FENV_IF_NOT_(f, e) TEST_DISABLE_FENV_IF_NOT_##e (f)
+#define TEST_DISABLE_FENV_IF_NOT_0(f) TEST_DISABLE_FENV (f)
+#define TEST_DISABLE_FENV_IF_NOT_1(f)
+
+#define TEST_INTERVAL(f, lo, hi, n) TEST_INTERVAL f lo hi n
+#define TEST_SYM_INTERVAL(f, lo, hi, n)                                       \
+  TEST_INTERVAL (f, lo, hi, n)                                                \
+  TEST_INTERVAL (f, -lo, -hi, n)
+// clang-format off
+#define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)                            \
+  TEST_INTERVAL f xlo,ylo xhi,yhi n
+// clang-format on
+
+#define TEST_CONTROL_VALUE(f, c) TEST_CONTROL_VALUE f c
diff --git a/pl/math/test/testcases/directed/acos.tst b/math/test/testcases/directed/acos.tst
similarity index 95%
rename from pl/math/test/testcases/directed/acos.tst
rename to math/test/testcases/directed/acos.tst
index a73dcd25965bb4..7889e62f4459fa 100644
--- a/pl/math/test/testcases/directed/acos.tst
+++ b/math/test/testcases/directed/acos.tst
@@ -1,6 +1,6 @@
 ; acos.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=acos op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/acosf.tst b/math/test/testcases/directed/acosf.tst
similarity index 95%
rename from pl/math/test/testcases/directed/acosf.tst
rename to math/test/testcases/directed/acosf.tst
index 9e453e3bff5e80..0c2165967abbfc 100644
--- a/pl/math/test/testcases/directed/acosf.tst
+++ b/math/test/testcases/directed/acosf.tst
@@ -1,6 +1,6 @@
 ; acosf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=acosf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/acosh.tst b/math/test/testcases/directed/acosh.tst
similarity index 96%
rename from pl/math/test/testcases/directed/acosh.tst
rename to math/test/testcases/directed/acosh.tst
index dd962bd391daa1..b78d64bb8ea71a 100644
--- a/pl/math/test/testcases/directed/acosh.tst
+++ b/math/test/testcases/directed/acosh.tst
@@ -1,6 +1,6 @@
 ; acosh.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=acosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/acoshf.tst b/math/test/testcases/directed/acoshf.tst
similarity index 95%
rename from pl/math/test/testcases/directed/acoshf.tst
rename to math/test/testcases/directed/acoshf.tst
index 606c615f9b74a7..9eec2caf014d17 100644
--- a/pl/math/test/testcases/directed/acoshf.tst
+++ b/math/test/testcases/directed/acoshf.tst
@@ -1,6 +1,6 @@
 ; acoshf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=acoshf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/asin.tst b/math/test/testcases/directed/asin.tst
similarity index 97%
rename from pl/math/test/testcases/directed/asin.tst
rename to math/test/testcases/directed/asin.tst
index 6180d7849d9038..7b916f3624c03c 100644
--- a/pl/math/test/testcases/directed/asin.tst
+++ b/math/test/testcases/directed/asin.tst
@@ -1,6 +1,6 @@
 ; asin.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=asin op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/asinf.tst b/math/test/testcases/directed/asinf.tst
similarity index 96%
rename from pl/math/test/testcases/directed/asinf.tst
rename to math/test/testcases/directed/asinf.tst
index a85b2593768d33..d5830b99b62081 100644
--- a/pl/math/test/testcases/directed/asinf.tst
+++ b/math/test/testcases/directed/asinf.tst
@@ -1,6 +1,6 @@
 ; asinf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=asinf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/asinh.tst b/math/test/testcases/directed/asinh.tst
similarity index 95%
rename from pl/math/test/testcases/directed/asinh.tst
rename to math/test/testcases/directed/asinh.tst
index 1485dfeffecf2e..9b250a14f50c8e 100644
--- a/pl/math/test/testcases/directed/asinh.tst
+++ b/math/test/testcases/directed/asinh.tst
@@ -1,6 +1,6 @@
 ; asinh.tst
 ;
-; Copyright (c) 2022-2023, Arm Limited.
+; Copyright (c) 2022-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=asinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/asinhf.tst b/math/test/testcases/directed/asinhf.tst
similarity index 95%
rename from pl/math/test/testcases/directed/asinhf.tst
rename to math/test/testcases/directed/asinhf.tst
index eb76a5892a7062..f2410e09b03e7e 100644
--- a/pl/math/test/testcases/directed/asinhf.tst
+++ b/math/test/testcases/directed/asinhf.tst
@@ -1,6 +1,6 @@
 ; asinhf.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=asinhf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/atan.tst b/math/test/testcases/directed/atan.tst
similarity index 96%
rename from pl/math/test/testcases/directed/atan.tst
rename to math/test/testcases/directed/atan.tst
index 4c670553d58fb0..d29b13245cd548 100644
--- a/pl/math/test/testcases/directed/atan.tst
+++ b/math/test/testcases/directed/atan.tst
@@ -1,6 +1,6 @@
 ; atan.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atan op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/atan2.tst b/math/test/testcases/directed/atan2.tst
similarity index 99%
rename from pl/math/test/testcases/directed/atan2.tst
rename to math/test/testcases/directed/atan2.tst
index 647b3764072cc1..3e34e7641f284c 100644
--- a/pl/math/test/testcases/directed/atan2.tst
+++ b/math/test/testcases/directed/atan2.tst
@@ -1,6 +1,6 @@
 ; atan2.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atan2 op1=7ff00000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
diff --git a/pl/math/test/testcases/directed/atan2f.tst b/math/test/testcases/directed/atan2f.tst
similarity index 99%
rename from pl/math/test/testcases/directed/atan2f.tst
rename to math/test/testcases/directed/atan2f.tst
index 85c5c5d47e10b3..e637fe0eba24d8 100644
--- a/pl/math/test/testcases/directed/atan2f.tst
+++ b/math/test/testcases/directed/atan2f.tst
@@ -1,6 +1,6 @@
 ; atan2f.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atan2f op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
diff --git a/pl/math/test/testcases/directed/atanf.tst b/math/test/testcases/directed/atanf.tst
similarity index 95%
rename from pl/math/test/testcases/directed/atanf.tst
rename to math/test/testcases/directed/atanf.tst
index 0a0bfc24c6050f..8739ea89c3a28a 100644
--- a/pl/math/test/testcases/directed/atanf.tst
+++ b/math/test/testcases/directed/atanf.tst
@@ -1,6 +1,6 @@
 ; atanf.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atanf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/atanh.tst b/math/test/testcases/directed/atanh.tst
similarity index 97%
rename from pl/math/test/testcases/directed/atanh.tst
rename to math/test/testcases/directed/atanh.tst
index d96ff327fcd9a8..7ba297e5046c7e 100644
--- a/pl/math/test/testcases/directed/atanh.tst
+++ b/math/test/testcases/directed/atanh.tst
@@ -1,6 +1,6 @@
 ; atanh.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/atanhf.tst b/math/test/testcases/directed/atanhf.tst
similarity index 96%
rename from pl/math/test/testcases/directed/atanhf.tst
rename to math/test/testcases/directed/atanhf.tst
index 21a68a661a1134..010012831b3cba 100644
--- a/pl/math/test/testcases/directed/atanhf.tst
+++ b/math/test/testcases/directed/atanhf.tst
@@ -1,6 +1,6 @@
 ; atanhf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atanhf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/cbrtf.tst b/math/test/testcases/directed/cbrtf.tst
similarity index 97%
rename from pl/math/test/testcases/directed/cbrtf.tst
rename to math/test/testcases/directed/cbrtf.tst
index 0dd8d09f1d4fb5..98942580c7a790 100644
--- a/pl/math/test/testcases/directed/cbrtf.tst
+++ b/math/test/testcases/directed/cbrtf.tst
@@ -1,6 +1,6 @@
 ; cbrtf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=cbrtf op1=7f800000 result=7f800000 errno=0
diff --git a/pl/math/test/testcases/directed/cosh.tst b/math/test/testcases/directed/cosh.tst
similarity index 95%
rename from pl/math/test/testcases/directed/cosh.tst
rename to math/test/testcases/directed/cosh.tst
index c4efacb7272d47..4dc6fe4846dcf6 100644
--- a/pl/math/test/testcases/directed/cosh.tst
+++ b/math/test/testcases/directed/cosh.tst
@@ -1,6 +1,6 @@
 ; cosh.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=cosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/coshf.tst b/math/test/testcases/directed/coshf.tst
similarity index 93%
rename from pl/math/test/testcases/directed/coshf.tst
rename to math/test/testcases/directed/coshf.tst
index 2b967e78f4b425..d224baf486a519 100644
--- a/pl/math/test/testcases/directed/coshf.tst
+++ b/math/test/testcases/directed/coshf.tst
@@ -1,6 +1,6 @@
 ; coshf.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=coshf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/erfc.tst b/math/test/testcases/directed/erfc.tst
similarity index 96%
rename from pl/math/test/testcases/directed/erfc.tst
rename to math/test/testcases/directed/erfc.tst
index c03fc591da47ae..249e7343eac21c 100644
--- a/pl/math/test/testcases/directed/erfc.tst
+++ b/math/test/testcases/directed/erfc.tst
@@ -1,6 +1,6 @@
 ; erfc.tst - Directed test cases for erfc
 ;
-; Copyright (c) 2022-2023, Arm Limited.
+; Copyright (c) 2022-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=erfc op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/erfcf.tst b/math/test/testcases/directed/erfcf.tst
similarity index 93%
rename from pl/math/test/testcases/directed/erfcf.tst
rename to math/test/testcases/directed/erfcf.tst
index 719baccb2e452b..22a1a8f236d81a 100644
--- a/pl/math/test/testcases/directed/erfcf.tst
+++ b/math/test/testcases/directed/erfcf.tst
@@ -1,6 +1,6 @@
 ; erfcf.tst - Directed test cases for erfcf
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=erfcf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/expm1.tst b/math/test/testcases/directed/expm1.tst
similarity index 96%
rename from pl/math/test/testcases/directed/expm1.tst
rename to math/test/testcases/directed/expm1.tst
index 609d6f47972135..3d58c6b3f16131 100644
--- a/pl/math/test/testcases/directed/expm1.tst
+++ b/math/test/testcases/directed/expm1.tst
@@ -1,6 +1,6 @@
 ; expm1.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=expm1 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/expm1f.tst b/math/test/testcases/directed/expm1f.tst
similarity index 98%
rename from pl/math/test/testcases/directed/expm1f.tst
rename to math/test/testcases/directed/expm1f.tst
index 44c38420a617eb..44a15d6798700b 100644
--- a/pl/math/test/testcases/directed/expm1f.tst
+++ b/math/test/testcases/directed/expm1f.tst
@@ -1,6 +1,6 @@
 ; expm1f.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=expm1f op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/log10.tst b/math/test/testcases/directed/log10.tst
similarity index 95%
rename from pl/math/test/testcases/directed/log10.tst
rename to math/test/testcases/directed/log10.tst
index 34831436234a8c..3ff2520134980a 100644
--- a/pl/math/test/testcases/directed/log10.tst
+++ b/math/test/testcases/directed/log10.tst
@@ -1,6 +1,6 @@
 ; log10.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/log10f.tst b/math/test/testcases/directed/log10f.tst
similarity index 98%
rename from pl/math/test/testcases/directed/log10f.tst
rename to math/test/testcases/directed/log10f.tst
index d5744a66f092f9..5c83e3f5e9b4ad 100644
--- a/pl/math/test/testcases/directed/log10f.tst
+++ b/math/test/testcases/directed/log10f.tst
@@ -1,6 +1,6 @@
 ; log10f.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log10f op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/log1p.tst b/math/test/testcases/directed/log1p.tst
similarity index 96%
rename from pl/math/test/testcases/directed/log1p.tst
rename to math/test/testcases/directed/log1p.tst
index 9ee8c62fc9c0bf..109413a79e96a3 100644
--- a/pl/math/test/testcases/directed/log1p.tst
+++ b/math/test/testcases/directed/log1p.tst
@@ -1,6 +1,6 @@
 ; log1p.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log1p op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/log1pf.tst b/math/test/testcases/directed/log1pf.tst
similarity index 99%
rename from pl/math/test/testcases/directed/log1pf.tst
rename to math/test/testcases/directed/log1pf.tst
index aaa01d67c2b39d..9655b9473612c7 100644
--- a/pl/math/test/testcases/directed/log1pf.tst
+++ b/math/test/testcases/directed/log1pf.tst
@@ -1,6 +1,6 @@
 ; log1pf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log1pf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/sinh.tst b/math/test/testcases/directed/sinh.tst
similarity index 96%
rename from pl/math/test/testcases/directed/sinh.tst
rename to math/test/testcases/directed/sinh.tst
index d6a3da8966933f..ab0d84b84d9ec2 100644
--- a/pl/math/test/testcases/directed/sinh.tst
+++ b/math/test/testcases/directed/sinh.tst
@@ -1,6 +1,6 @@
 ; sinh.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/sinhf.tst b/math/test/testcases/directed/sinhf.tst
similarity index 95%
rename from pl/math/test/testcases/directed/sinhf.tst
rename to math/test/testcases/directed/sinhf.tst
index 5f7bd1b04137d8..d9269c0fa405cb 100644
--- a/pl/math/test/testcases/directed/sinhf.tst
+++ b/math/test/testcases/directed/sinhf.tst
@@ -1,6 +1,6 @@
 ; sinhf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=sinhf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/tanf.tst b/math/test/testcases/directed/tanf.tst
similarity index 96%
rename from pl/math/test/testcases/directed/tanf.tst
rename to math/test/testcases/directed/tanf.tst
index 3161f70f43613d..e38142df6e3cea 100644
--- a/pl/math/test/testcases/directed/tanf.tst
+++ b/math/test/testcases/directed/tanf.tst
@@ -1,6 +1,6 @@
 ; tanf.tst
 ;
-; Copyright (c) 2022-2023, Arm Limited.
+; Copyright (c) 2022-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=tanf op1=7fc00001 result=7fc00001 errno=0
diff --git a/pl/math/test/testcases/directed/tanh.tst b/math/test/testcases/directed/tanh.tst
similarity index 95%
rename from pl/math/test/testcases/directed/tanh.tst
rename to math/test/testcases/directed/tanh.tst
index 78776e6f39249c..e842063c0ef7f3 100644
--- a/pl/math/test/testcases/directed/tanh.tst
+++ b/math/test/testcases/directed/tanh.tst
@@ -1,6 +1,6 @@
 ; tanh.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=tanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/pl/math/test/testcases/directed/tanhf.tst b/math/test/testcases/directed/tanhf.tst
similarity index 95%
rename from pl/math/test/testcases/directed/tanhf.tst
rename to math/test/testcases/directed/tanhf.tst
index 603e3107e44fc0..412aa12b362167 100644
--- a/pl/math/test/testcases/directed/tanhf.tst
+++ b/math/test/testcases/directed/tanhf.tst
@@ -1,6 +1,6 @@
 ; tanhf.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=tanhf op1=7fc00001 result=7fc00001 errno=0
diff --git a/math/test/trigpi_references.h b/math/test/trigpi_references.h
new file mode 100644
index 00000000000000..3dc5a317343622
--- /dev/null
+++ b/math/test/trigpi_references.h
@@ -0,0 +1,106 @@
+/*
+ * Extended precision scalar reference functions for trigpi.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#ifndef M_PIl
+#  define M_PIl 3.141592653589793238462643383279502884l
+#endif
+
+long double
+arm_math_sinpil (long double x)
+{
+  /* sin(inf) should return nan, as defined by C23.  */
+  if (isinf (x))
+    return __math_invalid (x);
+
+  long double ax = fabsl (x);
+
+  /* Return 0 for all values above 2^64 to prevent
+     overflow when casting to uint64_t.  */
+  if (ax >= 0x1p64)
+    return x < 0 ? -0.0l : 0.0l;
+
+  /* All integer cases should return 0, with unchanged sign for zero.  */
+  if (x == 0.0l)
+    return x;
+  if (ax == (uint64_t) ax)
+    return x < 0 ? -0.0l : 0.0l;
+
+  return sinl (x * M_PIl);
+}
+
+long double
+arm_math_cospil (long double x)
+{
+  /* cos(inf) should return nan, as defined by C23.  */
+  if (isinf (x))
+    return __math_invalid (x);
+
+  long double ax = fabsl (x);
+
+  if (ax >= 0x1p64)
+    return 1;
+
+  uint64_t m = (uint64_t) ax;
+
+  /* Integer values of cospi(x) should return +/-1.
+    The sign depends on if x is odd or even.  */
+  if (m == ax)
+    return (m & 1) ? -1 : 1;
+
+  /* Values of Integer + 0.5 should always return 0.  */
+  if (ax - 0.5 == m || ax + 0.5 == m)
+    return 0;
+
+  return cosl (ax * M_PIl);
+}
+
+long double
+arm_math_tanpil (long double x)
+{
+  /* inf and x = n + 0.5 for any integral n should return nan.  */
+  if (fabsl (x) >= 0x1p54l)
+    {
+      if (isinf (x))
+	return __math_invalid (x);
+      return x < 0 ? -0.0l : 0.0l;
+    }
+
+  long double i = roundl (x);
+  long double f = x - i;
+  int64_t m = (int64_t) i;
+
+  if (x == 0)
+    {
+      return x;
+    }
+  else if (x == i)
+    {
+      if (x < 0)
+	{
+	  return m & 1 ? 0.0l : -0.0l;
+	}
+      else
+	{
+	  return m & 1 ? -0.0l : 0.0l;
+	}
+    }
+  else if (fabsl (f) == 0.5l)
+    {
+      if (x < 0)
+	{
+	  return m & 1 ? -1.0l / 0.0l : 1.0l / 0.0l;
+	}
+      else
+	{
+	  return m & 1 ? 1.0l / 0.0l : -1.0l / 0.0l;
+	}
+    }
+
+  return tanl (f * M_PIl);
+}
diff --git a/math/test/ulp.c b/math/test/ulp.c
index 5ff29972e50ee0..0a75fe26463063 100644
--- a/math/test/ulp.c
+++ b/math/test/ulp.c
@@ -1,10 +1,23 @@
 /*
  * ULP error checking tool for math functions.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#if WANT_SVE_TESTS
+#  if __aarch64__ && __linux__
+#    ifdef __clang__
+#      pragma clang attribute push(__attribute__((target("sve"))),            \
+				   apply_to = any(function))
+#    else
+#      pragma GCC target("+sve")
+#    endif
+#  else
+#    error "SVE not supported - please disable WANT_SVE_TESTS"
+#  endif
+#endif
+
 #define _GNU_SOURCE
 #include <ctype.h>
 #include <fenv.h>
@@ -16,6 +29,8 @@
 #include <string.h>
 #include "mathlib.h"
 
+#include "trigpi_references.h"
+
 /* Don't depend on mpfr by default.  */
 #ifndef USE_MPFR
 # define USE_MPFR 0
@@ -24,50 +39,6 @@
 # include <mpfr.h>
 #endif
 
-static inline uint64_t
-asuint64 (double f)
-{
-  union
-  {
-    double f;
-    uint64_t i;
-  } u = {f};
-  return u.i;
-}
-
-static inline double
-asdouble (uint64_t i)
-{
-  union
-  {
-    uint64_t i;
-    double f;
-  } u = {i};
-  return u.f;
-}
-
-static inline uint32_t
-asuint (float f)
-{
-  union
-  {
-    float f;
-    uint32_t i;
-  } u = {f};
-  return u.i;
-}
-
-static inline float
-asfloat (uint32_t i)
-{
-  union
-  {
-    uint32_t i;
-    float f;
-  } u = {i};
-  return u.f;
-}
-
 static uint64_t seed = 0x0123456789abcdef;
 static uint64_t
 rand64 (void)
@@ -198,68 +169,96 @@ next_d2 (void *g)
   return (struct args_d2){asdouble (x), asdouble (x2)};
 }
 
-struct conf
-{
-  int r;
-  int rc;
-  int quiet;
-  int mpfr;
-  int fenv;
-  unsigned long long n;
-  double softlim;
-  double errlim;
-  int ignore_zero_sign;
-};
-
 /* A bit of a hack: call vector functions twice with the same
    input in lane 0 but a different value in other lanes: once
    with an in-range value and then with a special case value.  */
 static int secondcall;
 
 /* Wrappers for vector functions.  */
-#ifdef __vpcs
-typedef __f32x4_t v_float;
-typedef __f64x2_t v_double;
+#if __aarch64__ && __linux__
 /* First element of fv and dv may be changed by -c argument.  */
 static float fv[2] = {1.0f, -INFINITY};
 static double dv[2] = {1.0, -INFINITY};
-static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; }
-static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; }
-#if WANT_SVE_MATH
+static inline float32x4_t
+argf (float x)
+{
+  return (float32x4_t){ x, x, x, fv[secondcall] };
+}
+static inline float64x2_t
+argd (double x)
+{
+  return (float64x2_t){ x, dv[secondcall] };
+}
+#if WANT_SVE_TESTS
 #include <arm_sve.h>
-typedef __SVFloat32_t sv_float;
-typedef __SVFloat64_t sv_double;
-
-static inline sv_float svargf(float x)  {
-	int n = svcntw();
-	float base[n];
-	for (int i=0; i<n; i++)
-		base[i] = (float)x;
-	base[n-1] = (float) fv[secondcall];
-	return svld1(svptrue_b32(), base);
-}
-static inline sv_double svargd(double x) {
-	int n = svcntd();
-	double base[n];
-	for (int i=0; i<n; i++)
-		base[i] = x;
-	base[n-1] = dv[secondcall];
-	return svld1(svptrue_b64(), base);
-}
-static inline float svretf(sv_float vec)  {
-	int n = svcntw();
-	float res[n];
-	svst1(svptrue_b32(), res, vec);
-	return res[0];
-}
-static inline double svretd(sv_double vec) {
-	int n = svcntd();
-	double res[n];
-	svst1(svptrue_b64(), res, vec);
-	return res[0];
+
+static inline svfloat32_t
+svargf (float x)
+{
+  int n = svcntw ();
+  float base[n];
+  for (int i = 0; i < n; i++)
+    base[i] = (float) x;
+  base[n - 1] = (float) fv[secondcall];
+  return svld1 (svptrue_b32 (), base);
+}
+static inline svfloat64_t
+svargd (double x)
+{
+  int n = svcntd ();
+  double base[n];
+  for (int i = 0; i < n; i++)
+    base[i] = x;
+  base[n - 1] = dv[secondcall];
+  return svld1 (svptrue_b64 (), base);
+}
+static inline float
+svretf (svfloat32_t vec, svbool_t pg)
+{
+  return svlastb_f32 (svpfirst (pg, svpfalse ()), vec);
 }
+static inline double
+svretd (svfloat64_t vec, svbool_t pg)
+{
+  return svlastb_f64 (svpfirst (pg, svpfalse ()), vec);
+}
+
+static inline svbool_t
+parse_pg (uint64_t p, int is_single)
+{
+  if (is_single)
+    {
+      uint32_t tmp[svcntw ()];
+      for (unsigned i = 0; i < svcntw (); i++)
+	tmp[i] = (p >> i) & 1;
+      return svcmpne (svptrue_b32 (), svld1 (svptrue_b32 (), tmp), 0);
+    }
+  else
+    {
+      uint64_t tmp[svcntd ()];
+      for (unsigned i = 0; i < svcntd (); i++)
+	tmp[i] = (p >> i) & 1;
+      return svcmpne (svptrue_b64 (), svld1 (svptrue_b64 (), tmp), 0);
+    }
+}
+# endif
 #endif
+
+struct conf
+{
+  int r;
+  int rc;
+  int quiet;
+  int mpfr;
+  int fenv;
+  unsigned long long n;
+  double softlim;
+  double errlim;
+  int ignore_zero_sign;
+#if WANT_SVE_TESTS
+  svbool_t *pg;
 #endif
+};
 
 #include "test/ulp_wrappers.h"
 
@@ -269,12 +268,19 @@ struct fun
   int arity;
   int singleprec;
   int twice;
+  int is_predicated;
   union
   {
     float (*f1) (float);
     float (*f2) (float, float);
     double (*d1) (double);
     double (*d2) (double, double);
+#if WANT_SVE_TESTS
+    float (*f1_pred) (svbool_t, float);
+    float (*f2_pred) (svbool_t, float, float);
+    double (*d1_pred) (svbool_t, double);
+    double (*d2_pred) (svbool_t, double, double);
+#endif
   } fun;
   union
   {
@@ -294,44 +300,33 @@ struct fun
 #endif
 };
 
+// clang-format off
 static const struct fun fun[] = {
 #if USE_MPFR
-# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
-  {#x, a, s, twice, {.t = x_wrap}, {.t = x_long}, {.t = x_mpfr}},
+#  define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                        \
+    { #x, a, s, twice, 0, { .t = x_wrap }, { .t = x_long }, { .t = x_mpfr } },
+#  define SVF(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                      \
+    { #x, a, s, twice, 1, { .t##_pred = x_wrap }, { .t = x_long }, { .t = x_mpfr } },
 #else
-# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
-  {#x, a, s, twice, {.t = x_wrap}, {.t = x_long}},
+#  define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                        \
+    { #x, a, s, twice, 0, { .t = x_wrap }, { .t = x_long } },
+#  define SVF(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                      \
+    { #x, a, s, twice, 1, { .t##_pred = x_wrap }, { .t = x_long } },
 #endif
 #define F1(x) F (x##f, x##f, x, mpfr_##x, 1, 1, f1, 0)
 #define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0)
 #define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0)
 #define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0)
 /* Neon routines.  */
-#define VF1(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define VF2(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define VD1(x) F (__v_##x, v_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define VD2(x) F (__v_##x, v_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define VNF1(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define VNF2(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define VND1(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define VND2(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZVF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define ZVF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define ZVD1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define ZVD2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZVNF1(x) VNF1 (x) ZVF1 (x)
-#define ZVNF2(x) VNF2 (x) ZVF2 (x)
-#define ZVND1(x) VND1 (x) ZVD1 (x)
-#define ZVND2(x) VND2 (x) ZVD2 (x)
+#define ZVNF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZVNF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZVND1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZVND2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0)
 /* SVE routines.  */
-#define SVF1(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define SVF2(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define SVD1(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define SVD2(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZSVF1(x) F (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define ZSVF2(x) F (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define ZSVD1(x) F (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define ZSVD2(x) F (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZSVF1(x) SVF (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZSVF2(x) SVF (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZSVD1(x) SVF (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZSVD2(x) SVF (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
 
 #include "test/ulp_funcs.h"
 
@@ -340,11 +335,13 @@ static const struct fun fun[] = {
 #undef F2
 #undef D1
 #undef D2
-#undef SVF1
-#undef SVF2
-#undef SVD1
-#undef SVD2
- {0}};
+#undef ZSVF1
+#undef ZSVF2
+#undef ZSVD1
+#undef ZSVD2
+  { 0 }
+};
+// clang-format on
 
 /* Boilerplate for generic calls.  */
 
@@ -365,24 +362,40 @@ ulpscale_d (double x)
   return e - 0x3ff - 52;
 }
 static inline float
-call_f1 (const struct fun *f, struct args_f1 a)
+call_f1 (const struct fun *f, struct args_f1 a, const struct conf *conf)
 {
+#if WANT_SVE_TESTS
+  if (f->is_predicated)
+    return f->fun.f1_pred (*conf->pg, a.x);
+#endif
   return f->fun.f1 (a.x);
 }
 static inline float
-call_f2 (const struct fun *f, struct args_f2 a)
+call_f2 (const struct fun *f, struct args_f2 a, const struct conf *conf)
 {
+#if WANT_SVE_TESTS
+  if (f->is_predicated)
+    return f->fun.f2_pred (*conf->pg, a.x, a.x2);
+#endif
   return f->fun.f2 (a.x, a.x2);
 }
 
 static inline double
-call_d1 (const struct fun *f, struct args_d1 a)
+call_d1 (const struct fun *f, struct args_d1 a, const struct conf *conf)
 {
+#if WANT_SVE_TESTS
+  if (f->is_predicated)
+    return f->fun.d1_pred (*conf->pg, a.x);
+#endif
   return f->fun.d1 (a.x);
 }
 static inline double
-call_d2 (const struct fun *f, struct args_d2 a)
+call_d2 (const struct fun *f, struct args_d2 a, const struct conf *conf)
 {
+#if WANT_SVE_TESTS
+  if (f->is_predicated)
+    return f->fun.d2_pred (*conf->pg, a.x, a.x2);
+#endif
   return f->fun.d2 (a.x, a.x2);
 }
 static inline double
@@ -594,6 +607,11 @@ usage (void)
   puts ("-c: neutral 'control value' to test behaviour when one lane can affect another. \n"
 	"    This should be different from tested input in other lanes, and non-special \n"
 	"    (i.e. should not trigger fenv exceptions). Default is 1.");
+#endif
+#if WANT_SVE_TESTS
+  puts ("-p: integer input for controlling predicate passed to SVE function. "
+	"If bit N is set, lane N is activated (bits past the vector length "
+	"are ignored). Default is UINT64_MAX (ptrue).");
 #endif
   puts ("-z: ignore sign of 0.");
   puts ("Supported func:");
@@ -633,9 +651,21 @@ getnum (const char *s, int singleprec)
       sign = singleprec ? 1ULL << 31 : 1ULL << 63;
       s++;
     }
+
+  /* Sentinel value for failed parse.  */
+  char *should_not_be_s = NULL;
+
   /* 0xXXXX is treated as bit representation, '-' flips the sign bit.  */
   if (s[0] == '0' && tolower (s[1]) == 'x' && strchr (s, 'p') == 0)
-    return sign ^ strtoull (s, 0, 0);
+    {
+      uint64_t out = sign ^ strtoull (s, &should_not_be_s, 0);
+      if (should_not_be_s == s)
+	{
+	  printf ("ERROR: Could not parse '%s'\n", s);
+	  exit (1);
+	}
+      return out;
+    }
   //	/* SNaN, QNaN, NaN, Inf.  */
   //	for (i=0; s[i] && i < sizeof buf; i++)
   //		buf[i] = tolower(s[i]);
@@ -647,8 +677,16 @@ getnum (const char *s, int singleprec)
   //	if (strcmp(buf, "inf") == 0 || strcmp(buf, "infinity") == 0)
   //		return sign | (singleprec ? 0x7f800000 : 0x7ff0000000000000);
   /* Otherwise assume it's a floating-point literal.  */
-  return sign
-	 | (singleprec ? asuint (strtof (s, 0)) : asuint64 (strtod (s, 0)));
+  uint64_t out = sign
+		 | (singleprec ? asuint (strtof (s, &should_not_be_s))
+			       : asuint64 (strtod (s, &should_not_be_s)));
+  if (should_not_be_s == s)
+    {
+      printf ("ERROR: Could not parse '%s'\n", s);
+      exit (1);
+    }
+
+  return out;
 }
 
 static void
@@ -720,6 +758,9 @@ main (int argc, char *argv[])
   conf.softlim = 0;
   conf.errlim = INFINITY;
   conf.ignore_zero_sign = 0;
+#if WANT_SVE_TESTS
+  uint64_t pg_int = UINT64_MAX;
+#endif
   for (;;)
     {
       argc--;
@@ -767,13 +808,20 @@ main (int argc, char *argv[])
 	case 'z':
 	  conf.ignore_zero_sign = 1;
 	  break;
-#ifdef __vpcs
+#if  __aarch64__ && __linux__
 	case 'c':
 	  argc--;
 	  argv++;
 	  fv[0] = strtof(argv[0], 0);
 	  dv[0] = strtod(argv[0], 0);
 	  break;
+#endif
+#if WANT_SVE_TESTS
+	case 'p':
+	  argc--;
+	  argv++;
+	  pg_int = strtoull (argv[0], 0, 0);
+	  break;
 #endif
 	default:
 	  usage ();
@@ -806,7 +854,7 @@ main (int argc, char *argv[])
       if (strncmp (argv[0], "_ZGVnN", 6) == 0)
 	exit (0);
 #endif
-#if !WANT_SVE_MATH
+#if !WANT_SVE_TESTS
       if (strncmp (argv[0], "_ZGVsMxv", 8) == 0)
 	exit (0);
 #endif
@@ -824,5 +872,13 @@ main (int argc, char *argv[])
   argv++;
   parsegen (&gen, argc, argv, f);
   conf.n = gen.cnt;
+#if WANT_SVE_TESTS
+  svbool_t pg = parse_pg (pg_int, f->singleprec);
+  conf.pg = &pg;
+#endif
   return cmp (f, &gen, &conf);
 }
+
+#if __aarch64__ && __linux__ && WANT_SVE_TESTS && defined(__clang__)
+#  pragma clang attribute pop
+#endif
diff --git a/math/test/ulp.h b/math/test/ulp.h
index b0bc59aeef8ddb..de122257d3b155 100644
--- a/math/test/ulp.h
+++ b/math/test/ulp.h
@@ -1,13 +1,13 @@
 /*
  * Generic functions for ULP error estimation.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* For each different math function type,
    T(x) should add a different suffix to x.
-   RT(x) should add a return type specific suffix to x. */
+   RT(x) should add a return type specific suffix to x.  */
 
 #ifdef NEW_RT
 #undef NEW_RT
@@ -47,8 +47,12 @@ static double RT (ulperr) (RT (float) got, const struct RT (ret) * p, int r,
   if (RT(asuint) (got) == RT(asuint) (want))
     return 0.0;
   if (isnan (got) && isnan (want))
-    /* Ignore sign of NaN.  */
+  /* Ignore sign of NaN, and signalling-ness for MPFR.  */
+# if USE_MPFR
+    return 0;
+# else
     return RT (issignaling) (got) == RT (issignaling) (want) ? 0 : INFINITY;
+# endif
   if (signbit (got) != signbit (want))
     {
       /* Fall through to ULP calculation if ignoring sign of zero and at
@@ -80,7 +84,7 @@ static double RT (ulperr) (RT (float) got, const struct RT (ret) * p, int r,
       // TODO: incorrect when got vs want cross a powof2 boundary
       /* error = got > want
 	      ? got - want - tail ulp - 0.5 ulp
-	      : got - want - tail ulp + 0.5 ulp;  */
+	      : got - want - tail ulp + 0.5 ulp.  */
       d = got - want;
       e = d > 0 ? -p->tail - 0.5 : -p->tail + 0.5;
     }
@@ -108,32 +112,34 @@ static int RT(isok_nofenv) (RT(float) ygot, RT(float) ywant)
 }
 #endif
 
-static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
-				  RT(float) * y, int *ex)
+static inline void T (call_fenv) (const struct fun *f, struct T (args) a,
+				  int r, RT (float) * y, int *ex,
+				  const struct conf *conf)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
   feclearexcept (FE_ALL_EXCEPT);
-  *y = T(call) (f, a);
+  *y = T (call) (f, a, conf);
   *ex = fetestexcept (FE_ALL_EXCEPT);
   if (r != FE_TONEAREST)
     fesetround (FE_TONEAREST);
 }
 
-static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
-				    int r, RT(float) * y, int *ex)
+static inline void T (call_nofenv) (const struct fun *f, struct T (args) a,
+				    int r, RT (float) * y, int *ex,
+				    const struct conf *conf)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
-  *y = T(call) (f, a);
+  *y = T (call) (f, a, conf);
   *ex = 0;
   if (r != FE_TONEAREST)
     fesetround (FE_TONEAREST);
 }
 
-static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
-				      int r, struct RT(ret) * p,
-				      RT(float) ygot, int exgot)
+static inline int T (call_long_fenv) (const struct fun *f, struct T (args) a,
+				      int r, struct RT (ret) * p,
+				      RT (float) ygot, int exgot)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
@@ -269,6 +275,7 @@ static int T(cmp) (const struct fun *f, struct gen *gen,
   int r = conf->r;
   int use_mpfr = conf->mpfr;
   int fenv = conf->fenv;
+
   for (;;)
     {
       struct RT(ret) want;
@@ -279,15 +286,15 @@ static int T(cmp) (const struct fun *f, struct gen *gen,
       RT(float) ygot2;
       int fail = 0;
       if (fenv)
-	T(call_fenv) (f, a, r, &ygot, &exgot);
+	T (call_fenv) (f, a, r, &ygot, &exgot, conf);
       else
-	T(call_nofenv) (f, a, r, &ygot, &exgot);
+	T (call_nofenv) (f, a, r, &ygot, &exgot, conf);
       if (f->twice) {
 	secondcall = 1;
 	if (fenv)
-	  T(call_fenv) (f, a, r, &ygot2, &exgot2);
+	  T (call_fenv) (f, a, r, &ygot2, &exgot2, conf);
 	else
-	  T(call_nofenv) (f, a, r, &ygot2, &exgot2);
+	  T (call_nofenv) (f, a, r, &ygot2, &exgot2, conf);
 	secondcall = 0;
 	if (RT(asuint) (ygot) != RT(asuint) (ygot2))
 	  {
diff --git a/math/test/ulp_funcs.h b/math/test/ulp_funcs.h
index 84f7927d393548..b58a68ff275bf8 100644
--- a/math/test/ulp_funcs.h
+++ b/math/test/ulp_funcs.h
@@ -1,40 +1,109 @@
 /*
  * Function entries for ulp.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 /* clang-format off */
- F1 (sin)
- F1 (cos)
  F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
  F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
- F1 (exp)
- F1 (exp2)
- F1 (log)
- F1 (log2)
  F2 (pow)
- F1 (erf)
- D1 (exp)
- D1 (exp10)
- D1 (exp2)
- D1 (log)
- D1 (log2)
  D2 (pow)
- D1 (erf)
-#ifdef __vpcs
- F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
- F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+#if __aarch64__ && __linux__
  F (_ZGVnN4v_expf_1u, Z_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
- F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
  F (_ZGVnN4v_exp2f_1u, Z_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
  F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
- F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
- F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
- F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
- F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
  F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
+ F (_ZGVnN4v_sincosf_sin, v_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
+ F (_ZGVnN4v_sincosf_cos, v_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
+ F (_ZGVnN4v_cexpif_sin, v_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
+ F (_ZGVnN4v_cexpif_cos, v_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
+ F (_ZGVnN4vl4_modff_frac, v_modff_frac, modf_frac, modf_mpfr_frac, 1, 1, f1, 0)
+ F (_ZGVnN4vl4_modff_int, v_modff_int, modf_int, modf_mpfr_int, 1, 1, f1, 0)
+ F (_ZGVnN2v_sincos_sin, v_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+ F (_ZGVnN2v_sincos_cos, v_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+ F (_ZGVnN2v_cexpi_sin, v_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+ F (_ZGVnN2v_cexpi_cos, v_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+ F (_ZGVnN2vl8_modf_frac, v_modf_frac, modfl_frac, modf_mpfr_frac, 1, 0, d1, 0)
+ F (_ZGVnN2vl8_modf_int, v_modf_int, modfl_int, modf_mpfr_int, 1, 0, d1, 0)
 #endif
-/* clang-format on */
+
+#if WANT_SVE_TESTS
+SVF (_ZGVsMxv_sincosf_sin, sv_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
+SVF (_ZGVsMxv_sincosf_cos, sv_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
+SVF (_ZGVsMxv_cexpif_sin, sv_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
+SVF (_ZGVsMxv_cexpif_cos, sv_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
+SVF (_ZGVsMxvl4_modff_frac, sv_modff_frac, modf_frac, modf_mpfr_frac, 1, 1, f1, 0)
+SVF (_ZGVsMxvl4_modff_int, sv_modff_int, modf_int, modf_mpfr_int, 1, 1, f1, 0)
+SVF (_ZGVsMxv_sincos_sin, sv_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+SVF (_ZGVsMxv_sincos_cos, sv_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+SVF (_ZGVsMxv_cexpi_sin, sv_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+SVF (_ZGVsMxv_cexpi_cos, sv_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+SVF (_ZGVsMxvl8_modf_frac, sv_modf_frac, modfl_frac, modf_mpfr_frac, 1, 0, d1, 0)
+SVF (_ZGVsMxvl8_modf_int, sv_modf_int, modfl_int, modf_mpfr_int, 1, 0, d1, 0)
+#endif
+
+#if WANT_EXPERIMENTAL_MATH
+ F (arm_math_erff, arm_math_erff, erf, mpfr_erf, 1, 1, f1, 0)
+ F (arm_math_erf,  arm_math_erf,  erfl, mpfr_erf, 1, 0, d1, 0)
+#endif
+
+#if WANT_TRIGPI_TESTS
+ F (arm_math_cospif, arm_math_cospif, arm_math_cospi, mpfr_cospi, 1, 1, f1, 0)
+ F (arm_math_cospi,  arm_math_cospi,  arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+ F (arm_math_sinpif, arm_math_sinpif, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
+ F (arm_math_sinpi,  arm_math_sinpi,  arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ F (arm_math_tanpif, arm_math_tanpif, arm_math_tanpi, mpfr_tanpi, 1, 1, f1, 0)
+ F (arm_math_tanpi,  arm_math_tanpi,  arm_math_tanpil, mpfr_tanpi, 1, 0, d1, 0)
+ F (arm_math_sincospif_sin, arm_math_sincospif_sin, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
+ F (arm_math_sincospif_cos, arm_math_sincospif_cos, arm_math_cospi, mpfr_cospi, 1, 1, f1, 0)
+ F (arm_math_sincospi_sin, arm_math_sincospi_sin, arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ F (arm_math_sincospi_cos, arm_math_sincospi_cos, arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+# if __aarch64__ && __linux__
+ F (_ZGVnN4v_cospif, Z_cospif, arm_math_cospi,  mpfr_cospi, 1, 1, f1, 0)
+ F (_ZGVnN2v_cospi,  Z_cospi,  arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+ F (_ZGVnN4v_sinpif, Z_sinpif, arm_math_sinpi,  mpfr_sinpi, 1, 1, f1, 0)
+ F (_ZGVnN2v_sinpi,  Z_sinpi,  arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ F (_ZGVnN4v_tanpif, Z_tanpif, arm_math_tanpi,  mpfr_tanpi, 1, 1, f1, 0)
+ F (_ZGVnN2v_tanpi,  Z_tanpi,  arm_math_tanpil, mpfr_tanpi, 1, 0, d1, 0)
+ F (_ZGVnN4v_sincospif_sin, v_sincospif_sin, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
+ F (_ZGVnN4v_sincospif_cos, v_sincospif_cos, arm_math_cospi, mpfr_cospi, 1, 1, f1, 0)
+ F (_ZGVnN2v_sincospi_sin, v_sincospi_sin, arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ F (_ZGVnN2v_sincospi_cos, v_sincospi_cos, arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+# endif
+# if WANT_SVE_TESTS
+ SVF (_ZGVsMxv_cospif, Z_sv_cospif, arm_math_cospi,  mpfr_cospi, 1, 1, f1, 0)
+ SVF (_ZGVsMxv_cospi,  Z_sv_cospi,  arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+ SVF (_ZGVsMxv_sinpif, Z_sv_sinpif, arm_math_sinpi,  mpfr_sinpi, 1, 1, f1, 0)
+ SVF (_ZGVsMxv_sinpi,  Z_sv_sinpi,  arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ SVF (_ZGVsMxv_tanpif, Z_sv_tanpif, arm_math_tanpi,  mpfr_tanpi, 1, 1, f1, 0)
+ SVF (_ZGVsMxv_tanpi,  Z_sv_tanpi,  arm_math_tanpil, mpfr_tanpi, 1, 0, d1, 0)
+ SVF (_ZGVsMxvl4l4_sincospif_sin, sv_sincospif_sin, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
+ SVF (_ZGVsMxvl4l4_sincospif_cos, sv_sincospif_cos, arm_math_cospi, mpfr_cospi, 1, 1, f1, 0)
+ SVF (_ZGVsMxvl8l8_sincospi_sin, sv_sincospi_sin, arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ SVF (_ZGVsMxvl8l8_sincospi_cos, sv_sincospi_cos, arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+#  if WANT_EXPERIMENTAL_MATH
+SVF (_ZGVsMxvv_powk, Z_sv_powk, ref_powi, mpfr_powi, 2, 0, d2, 0)
+SVF (_ZGVsMxvv_powi, Z_sv_powi, ref_powif, mpfr_powi, 2, 1, f2, 0)
+#  endif
+# endif
+#endif
+
+ /* clang-format on */
+
+#define _ZSF1(f) F1 (f)
+#define _ZSF2(f) F2 (f)
+#define _ZSD1(f) D1 (f)
+#define _ZSD2(f) D2 (f)
+
+#define _ZVF1(f) ZVNF1 (f)
+#define _ZVD1(f) ZVND1 (f)
+#define _ZVF2(f) ZVNF2 (f)
+#define _ZVD2(f) ZVND2 (f)
+
+#define _ZSVF1(f) ZSVF1 (f)
+#define _ZSVF2(f) ZSVF2 (f)
+#define _ZSVD1(f) ZSVD1 (f)
+#define _ZSVD2(f) ZSVD2 (f)
+
+#include "test/ulp_funcs_gen.h"
diff --git a/math/test/ulp_wrappers.h b/math/test/ulp_wrappers.h
index 60dc3d6dd65287..33e1e75f23ab34 100644
--- a/math/test/ulp_wrappers.h
+++ b/math/test/ulp_wrappers.h
@@ -1,12 +1,18 @@
 /*
  * Function wrappers for ulp.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* clang-format off */
 
+#if  __aarch64__ && __linux__
+#include <arm_neon.h>
+#endif
+
+#include <stdbool.h>
+
 /* Wrappers for sincos.  */
 static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
 static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
@@ -15,23 +21,409 @@ static double sincos_cos(double x) {(void)sin(x); return cos(x);}
 #if USE_MPFR
 static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
 static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
+static int modf_mpfr_frac(mpfr_t f, const mpfr_t x, mpfr_rnd_t r) { MPFR_DECL_INIT(i, 80); return mpfr_modf(i,f,x,r); }
+static int modf_mpfr_int(mpfr_t i, const mpfr_t x, mpfr_rnd_t r) { MPFR_DECL_INIT(f, 80); return mpfr_modf(i,f,x,r); }
+# if MPFR_VERSION < MPFR_VERSION_NUM(4, 2, 0)
+static int mpfr_tanpi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd) {
+  MPFR_DECL_INIT (frd, 1080);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_tan (ret, frd, GMP_RNDN);
+}
+static int mpfr_sinpi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd) {
+  MPFR_DECL_INIT (frd, 1080);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_sin (ret, frd, GMP_RNDN);
+}
+
+static int mpfr_cospi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd) {
+  MPFR_DECL_INIT (frd, 1080);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_cos (ret, frd, GMP_RNDN);
+}
+# endif
+# if WANT_EXPERIMENTAL_MATH
+static int wrap_mpfr_powi(mpfr_t ret, const mpfr_t x, const mpfr_t y, mpfr_rnd_t rnd) {
+  mpfr_t y2;
+  mpfr_init(y2);
+  mpfr_trunc(y2, y);
+  return mpfr_pow(ret, x, y2, rnd);
+}
+# endif
 #endif
 
+float modff_frac(float x) { float i; return modff(x, &i); }
+float modff_int(float x) { float i; modff(x, &i); return i; }
+double modf_frac(double x) { double i; return modf(x, &i); }
+double modf_int(double x) { double i; modf(x, &i); return i; }
+long double modfl_frac(long double x) { long double i; return modfl(x, &i); }
+long double modfl_int(long double x) { long double i; modfl(x, &i); return i; }
+
 /* Wrappers for vector functions.  */
-#ifdef __vpcs
-static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
-static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
+#if __aarch64__ && __linux__
 static float Z_expf_1u(float x) { return _ZGVnN4v_expf_1u(argf(x))[0]; }
-static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
 static float Z_exp2f_1u(float x) { return _ZGVnN4v_exp2f_1u(argf(x))[0]; }
-static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
-static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
-static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
-static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
-static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
-static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
-static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
-static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
 #endif
 
 /* clang-format on */
+
+/* No wrappers for scalar routines, but TEST_SIG will emit them.  */
+#define ZSNF1_WRAP(func)
+#define ZSNF2_WRAP(func)
+#define ZSND1_WRAP(func)
+#define ZSND2_WRAP(func)
+
+#define ZVNF1_WRAP(func)                                                      \
+  static float Z_##func##f (float x)                                          \
+  {                                                                           \
+    return _ZGVnN4v_##func##f (argf (x))[0];                                  \
+  }
+#define ZVNF2_WRAP(func)                                                      \
+  static float Z_##func##f (float x, float y)                                 \
+  {                                                                           \
+    return _ZGVnN4vv_##func##f (argf (x), argf (y))[0];                       \
+  }
+#define ZVND1_WRAP(func)                                                      \
+  static double Z_##func (double x) { return _ZGVnN2v_##func (argd (x))[0]; }
+#define ZVND2_WRAP(func)                                                      \
+  static double Z_##func (double x, double y)                                 \
+  {                                                                           \
+    return _ZGVnN2vv_##func (argd (x), argd (y))[0];                          \
+  }
+
+#if WANT_TRIGPI_TESTS
+float
+arm_math_sincospif_sin (float x)
+{
+  float s, c;
+  arm_math_sincospif (x, &s, &c);
+  return s;
+}
+float
+arm_math_sincospif_cos (float x)
+{
+  float s, c;
+  arm_math_sincospif (x, &s, &c);
+  return c;
+}
+double
+arm_math_sincospi_sin (double x)
+{
+  double s, c;
+  arm_math_sincospi (x, &s, &c);
+  return s;
+}
+double
+arm_math_sincospi_cos (double x)
+{
+  double s, c;
+  arm_math_sincospi (x, &s, &c);
+  return c;
+}
+#endif
+
+#if  __aarch64__ && __linux__
+
+# if WANT_TRIGPI_TESTS
+ZVNF1_WRAP (cospi)
+ZVND1_WRAP (cospi)
+ZVNF1_WRAP (sinpi)
+ZVND1_WRAP (sinpi)
+ZVNF1_WRAP (tanpi)
+ZVND1_WRAP (tanpi)
+
+double
+v_sincospi_sin (double x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincospi (vdupq_n_f64 (x), s, c);
+  return s[0];
+}
+double
+v_sincospi_cos (double x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincospi (vdupq_n_f64 (x), s, c);
+  return c[0];
+}
+float
+v_sincospif_sin (float x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincospif (vdupq_n_f32 (x), s, c);
+  return s[0];
+}
+float
+v_sincospif_cos (float x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincospif (vdupq_n_f32 (x), s, c);
+  return c[0];
+}
+# endif // WANT_TRIGPI_TESTS
+
+float
+v_sincosf_sin (float x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincosf (vdupq_n_f32 (x), s, c);
+  return s[0];
+}
+float
+v_sincosf_cos (float x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincosf (vdupq_n_f32 (x), s, c);
+  return c[0];
+}
+float
+v_cexpif_sin (float x)
+{
+  return _ZGVnN4v_cexpif (vdupq_n_f32 (x)).val[0][0];
+}
+float
+v_cexpif_cos (float x)
+{
+  return _ZGVnN4v_cexpif (vdupq_n_f32 (x)).val[1][0];
+}
+float
+v_modff_frac (float x)
+{
+  float y[4];
+  return _ZGVnN4vl4_modff (vdupq_n_f32 (x), y)[0];
+}
+float
+v_modff_int (float x)
+{
+  float y[4];
+  _ZGVnN4vl4_modff (vdupq_n_f32 (x), y);
+  return y[0];
+}
+double
+v_sincos_sin (double x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincos (vdupq_n_f64 (x), s, c);
+  return s[0];
+}
+double
+v_sincos_cos (double x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincos (vdupq_n_f64 (x), s, c);
+  return c[0];
+}
+double
+v_cexpi_sin (double x)
+{
+  return _ZGVnN2v_cexpi (vdupq_n_f64 (x)).val[0][0];
+}
+double
+v_cexpi_cos (double x)
+{
+  return _ZGVnN2v_cexpi (vdupq_n_f64 (x)).val[1][0];
+}
+double
+v_modf_frac (double x)
+{
+  double y[2];
+  return _ZGVnN2vl8_modf (vdupq_n_f64 (x), y)[0];
+}
+double
+v_modf_int (double x)
+{
+  double y[2];
+  _ZGVnN2vl8_modf (vdupq_n_f64 (x), y);
+  return y[0];
+}
+#endif //  __aarch64__ && __linux__
+
+#if WANT_SVE_TESTS
+# define ZSVNF1_WRAP(func)                                                   \
+    static float Z_sv_##func##f (svbool_t pg, float x)                        \
+    {                                                                         \
+      return svretf (_ZGVsMxv_##func##f (svargf (x), pg), pg);                \
+    }
+# define ZSVNF2_WRAP(func)                                                   \
+    static float Z_sv_##func##f (svbool_t pg, float x, float y)               \
+    {                                                                         \
+      return svretf (_ZGVsMxvv_##func##f (svargf (x), svargf (y), pg), pg);   \
+    }
+# define ZSVND1_WRAP(func)                                                   \
+    static double Z_sv_##func (svbool_t pg, double x)                         \
+    {                                                                         \
+      return svretd (_ZGVsMxv_##func (svargd (x), pg), pg);                   \
+    }
+# define ZSVND2_WRAP(func)                                                   \
+    static double Z_sv_##func (svbool_t pg, double x, double y)               \
+    {                                                                         \
+      return svretd (_ZGVsMxvv_##func (svargd (x), svargd (y), pg), pg);      \
+    }
+
+# if WANT_TRIGPI_TESTS
+ZSVNF1_WRAP (cospi)
+ZSVND1_WRAP (cospi)
+ZSVNF1_WRAP (sinpi)
+ZSVND1_WRAP (sinpi)
+ZSVNF1_WRAP (tanpi)
+ZSVND1_WRAP (tanpi)
+double
+sv_sincospi_sin (svbool_t pg, double x)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincospi (svdup_f64 (x), s, c, pg);
+  return svretd (svld1 (pg, s), pg);
+}
+double
+sv_sincospi_cos (svbool_t pg, double x)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincospi (svdup_f64 (x), s, c, pg);
+  return svretd (svld1 (pg, c), pg);
+}
+float
+sv_sincospif_sin (svbool_t pg, float x)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincospif (svdup_f32 (x), s, c, pg);
+  return svretf (svld1 (pg, s), pg);
+}
+float
+sv_sincospif_cos (svbool_t pg, float x)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincospif (svdup_f32 (x), s, c, pg);
+  return svretf (svld1 (pg, c), pg);
+}
+# endif // WANT_TRIGPI_TESTS
+
+float
+sv_sincosf_sin (svbool_t pg, float x)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincosf (svdup_f32 (x), s, c, pg);
+  return svretf (svld1 (pg, s), pg);
+}
+float
+sv_sincosf_cos (svbool_t pg, float x)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincosf (svdup_f32 (x), s, c, pg);
+  return svretf (svld1 (pg, c), pg);
+}
+float
+sv_cexpif_sin (svbool_t pg, float x)
+{
+  return svretf (svget2 (_ZGVsMxv_cexpif (svdup_f32 (x), pg), 0), pg);
+}
+float
+sv_cexpif_cos (svbool_t pg, float x)
+{
+  return svretf (svget2 (_ZGVsMxv_cexpif (svdup_f32 (x), pg), 1), pg);
+}
+float
+sv_modff_frac (svbool_t pg, float x)
+{
+  float i[svcntw ()];
+  return svretf (_ZGVsMxvl4_modff (svdup_f32 (x), i, pg), pg);
+}
+float
+sv_modff_int (svbool_t pg, float x)
+{
+  float i[svcntw ()];
+  _ZGVsMxvl4_modff (svdup_f32 (x), i, pg);
+  return svretf (svld1 (pg, i), pg);
+}
+double
+sv_sincos_sin (svbool_t pg, double x)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincos (svdup_f64 (x), s, c, pg);
+  return svretd (svld1 (pg, s), pg);
+}
+double
+sv_sincos_cos (svbool_t pg, double x)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincos (svdup_f64 (x), s, c, pg);
+  return svretd (svld1 (pg, c), pg);
+}
+double
+sv_cexpi_sin (svbool_t pg, double x)
+{
+  return svretd (svget2 (_ZGVsMxv_cexpi (svdup_f64 (x), pg), 0), pg);
+}
+double
+sv_cexpi_cos (svbool_t pg, double x)
+{
+  return svretd (svget2 (_ZGVsMxv_cexpi (svdup_f64 (x), pg), 1), pg);
+}
+double
+sv_modf_frac (svbool_t pg, double x)
+{
+  double i[svcntd ()];
+  return svretd (_ZGVsMxvl8_modf (svdup_f64 (x), i, pg), pg);
+}
+double
+sv_modf_int (svbool_t pg, double x)
+{
+  double i[svcntd ()];
+  _ZGVsMxvl8_modf (svdup_f64 (x), i, pg);
+  return svretd (svld1 (pg, i), pg);
+}
+
+# if WANT_EXPERIMENTAL_MATH
+
+/* Our implementations of powi/powk are too imprecise to verify
+   against any established pow implementation. Instead we have the
+   following simple implementation, against which it is enough to
+   maintain bitwise reproducibility. Note the test framework expects
+   the reference impl to be of higher precision than the function
+   under test. For instance this means that the reference for
+   double-precision powi will be passed a long double, so to check
+   bitwise reproducibility we have to cast it back down to
+   double. This is fine since a round-trip to higher precision and
+   back down is correctly rounded.  */
+#  define DECL_POW_INT_REF(NAME, DBL_T, FLT_T, INT_T)                       \
+      static DBL_T __attribute__ ((unused)) NAME (DBL_T in_val, DBL_T y)      \
+      {                                                                       \
+	INT_T n = (INT_T) round (y);                                          \
+	FLT_T acc = 1.0;                                                      \
+	bool want_recip = n < 0;                                              \
+	n = n < 0 ? -n : n;                                                   \
+                                                                              \
+	for (FLT_T c = in_val; n; c *= c, n >>= 1)                            \
+	  {                                                                   \
+	    if (n & 0x1)                                                      \
+	      {                                                               \
+		acc *= c;                                                     \
+	      }                                                               \
+	  }                                                                   \
+	if (want_recip)                                                       \
+	  {                                                                   \
+	    acc = 1.0 / acc;                                                  \
+	  }                                                                   \
+	return acc;                                                           \
+      }
+
+DECL_POW_INT_REF (ref_powif, double, float, int)
+DECL_POW_INT_REF (ref_powi, long double, double, int)
+static float
+Z_sv_powi (svbool_t pg, float x, float y)
+{
+  return svretf (_ZGVsMxvv_powi (svargf (x), svdup_s32 ((int) round (y)), pg),
+		 pg);
+}
+static double
+Z_sv_powk (svbool_t pg, double x, double y)
+{
+  return svretd (_ZGVsMxvv_powk (svargd (x), svdup_s64 ((long) round (y)), pg),
+		 pg);
+}
+
+# endif // WANT_EXPERIMENTAL_MATH
+#endif	// WANT_SVE_TESTS
+
+#include "test/ulp_wrappers_gen.h"
diff --git a/math/tgamma128.c b/math/tgamma128.c
index 65deacc49d99f9..d6049207b91f3a 100644
--- a/math/tgamma128.c
+++ b/math/tgamma128.c
@@ -338,6 +338,8 @@ long double tgamma128(long double x)
                 mult = 2111.484375L+t*(4033.5L+t*(3016.1875L+t*(
                     1140.0L+t*(231.25L+t*(24.0L+t)))));
                 break;
+	    default:
+	        __builtin_unreachable();
             }
         }
 
diff --git a/pl/math/tools/asin.sollya b/math/tools/asin.sollya
similarity index 93%
rename from pl/math/tools/asin.sollya
rename to math/tools/asin.sollya
index 8ef861d0898bd5..02c4a93356c3d2 100644
--- a/pl/math/tools/asin.sollya
+++ b/math/tools/asin.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating asin(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 f = asin(x);
diff --git a/pl/math/tools/asinf.sollya b/math/tools/asinf.sollya
similarity index 94%
rename from pl/math/tools/asinf.sollya
rename to math/tools/asinf.sollya
index 5b627e546c73b3..69d1803875d160 100644
--- a/pl/math/tools/asinf.sollya
+++ b/math/tools/asinf.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating asinf(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 f = asin(x);
diff --git a/pl/math/tools/asinh.sollya b/math/tools/asinh.sollya
similarity index 94%
rename from pl/math/tools/asinh.sollya
rename to math/tools/asinh.sollya
index 663ee92f3f3471..eea9b808116859 100644
--- a/pl/math/tools/asinh.sollya
+++ b/math/tools/asinh.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating asinh(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // Polynomial is used in [2^-26, 1]. However it is least accurate close to 1, so
diff --git a/pl/math/tools/asinhf.sollya b/math/tools/asinhf.sollya
similarity index 93%
rename from pl/math/tools/asinhf.sollya
rename to math/tools/asinhf.sollya
index ab115b53b8dc79..5f1580fce88328 100644
--- a/pl/math/tools/asinhf.sollya
+++ b/math/tools/asinhf.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating asinh(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 9;
diff --git a/pl/math/tools/atan.sollya b/math/tools/atan.sollya
similarity index 93%
rename from pl/math/tools/atan.sollya
rename to math/tools/atan.sollya
index ad4f33b8516a94..048017d8d269ae 100644
--- a/pl/math/tools/atan.sollya
+++ b/math/tools/atan.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating atan(x) and atan2(y, x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // atan is odd, so approximate with an odd polynomial:
diff --git a/pl/math/tools/atanf.sollya b/math/tools/atanf.sollya
similarity index 92%
rename from pl/math/tools/atanf.sollya
rename to math/tools/atanf.sollya
index ed88d0ba90f937..21c3ba2bfa1d8e 100644
--- a/pl/math/tools/atanf.sollya
+++ b/math/tools/atanf.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating atanf(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // Generate list of monomials:
diff --git a/pl/math/tools/cbrt.sollya b/math/tools/cbrt.sollya
similarity index 90%
rename from pl/math/tools/cbrt.sollya
rename to math/tools/cbrt.sollya
index 1d43dc73d8cdfa..2490a69ac029d0 100644
--- a/pl/math/tools/cbrt.sollya
+++ b/math/tools/cbrt.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating cbrt(x) in double precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 3;
diff --git a/pl/math/tools/cbrtf.sollya b/math/tools/cbrtf.sollya
similarity index 90%
rename from pl/math/tools/cbrtf.sollya
rename to math/tools/cbrtf.sollya
index 4e0cc69b46a58e..1debf930e7226e 100644
--- a/pl/math/tools/cbrtf.sollya
+++ b/math/tools/cbrtf.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating cbrt(x) in single precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 3;
diff --git a/pl/math/tools/erf.sollya b/math/tools/erf.sollya
similarity index 92%
rename from pl/math/tools/erf.sollya
rename to math/tools/erf.sollya
index b2fc559b511ef1..060e1686c835ae 100644
--- a/pl/math/tools/erf.sollya
+++ b/math/tools/erf.sollya
@@ -1,6 +1,6 @@
 // tables and constants for approximating erf(x).
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 display = hexadecimal;
diff --git a/pl/math/tools/erfc.sollya b/math/tools/erfc.sollya
similarity index 95%
rename from pl/math/tools/erfc.sollya
rename to math/tools/erfc.sollya
index 1e2791291ebbb8..1b4b0006609392 100644
--- a/pl/math/tools/erfc.sollya
+++ b/math/tools/erfc.sollya
@@ -1,6 +1,6 @@
 // tables and constants for approximating erfc(x).
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 display = hexadecimal;
diff --git a/pl/math/tools/erfcf.sollya b/math/tools/erfcf.sollya
similarity index 91%
rename from pl/math/tools/erfcf.sollya
rename to math/tools/erfcf.sollya
index 1d7fc264d99d2b..a8e0409f5db56f 100644
--- a/pl/math/tools/erfcf.sollya
+++ b/math/tools/erfcf.sollya
@@ -1,6 +1,6 @@
 // tables and constants for approximating erfcf(x).
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 display = hexadecimal;
diff --git a/pl/math/tools/erff.sollya b/math/tools/erff.sollya
similarity index 91%
rename from pl/math/tools/erff.sollya
rename to math/tools/erff.sollya
index 59b23ef021f0fb..c0178a2b24adfe 100644
--- a/pl/math/tools/erff.sollya
+++ b/math/tools/erff.sollya
@@ -1,6 +1,6 @@
 // tables and constants for approximating erff(x).
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 display = hexadecimal;
diff --git a/pl/math/tools/exp10.sollya b/math/tools/exp10.sollya
similarity index 97%
rename from pl/math/tools/exp10.sollya
rename to math/tools/exp10.sollya
index 9f30b401820958..91f92595b96dab 100644
--- a/pl/math/tools/exp10.sollya
+++ b/math/tools/exp10.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating 10^x
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // exp10f parameters
diff --git a/pl/math/tools/expm1.sollya b/math/tools/expm1.sollya
similarity index 91%
rename from pl/math/tools/expm1.sollya
rename to math/tools/expm1.sollya
index 7b6f324eb247b8..d87466a066af65 100644
--- a/pl/math/tools/expm1.sollya
+++ b/math/tools/expm1.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating exp(x)-1 in double precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 12;
diff --git a/pl/math/tools/expm1f.sollya b/math/tools/expm1f.sollya
similarity index 91%
rename from pl/math/tools/expm1f.sollya
rename to math/tools/expm1f.sollya
index efdf1bd301e0ed..bb9496f3f2c426 100644
--- a/pl/math/tools/expm1f.sollya
+++ b/math/tools/expm1f.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating exp(x)-1 in single precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 5;
diff --git a/pl/math/tools/log10.sollya b/math/tools/log10.sollya
similarity index 96%
rename from pl/math/tools/log10.sollya
rename to math/tools/log10.sollya
index 85d1d15c16985b..78f956b14b9599 100644
--- a/pl/math/tools/log10.sollya
+++ b/math/tools/log10.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating log10(1+x)
 //
-// Copyright (c) 2019-2023, Arm Limited.
+// Copyright (c) 2019-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 6; // poly degree
diff --git a/pl/math/tools/log10f.sollya b/math/tools/log10f.sollya
similarity index 96%
rename from pl/math/tools/log10f.sollya
rename to math/tools/log10f.sollya
index 94bf32f2c449b3..c64a30aa8e1841 100644
--- a/pl/math/tools/log10f.sollya
+++ b/math/tools/log10f.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating log10f(1+x)
 //
-// Copyright (c) 2019-2023, Arm Limited.
+// Copyright (c) 2019-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // Computation of log10f(1+x) will be carried out in double precision
diff --git a/pl/math/tools/log1p.sollya b/math/tools/log1p.sollya
similarity index 93%
rename from pl/math/tools/log1p.sollya
rename to math/tools/log1p.sollya
index 598a36af03394f..0cf72081fabb5e 100644
--- a/pl/math/tools/log1p.sollya
+++ b/math/tools/log1p.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating log(1+x) in double precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 20;
diff --git a/pl/math/tools/log1pf.sollya b/math/tools/log1pf.sollya
similarity index 91%
rename from pl/math/tools/log1pf.sollya
rename to math/tools/log1pf.sollya
index cc1db10e4c0c8d..fc542c93711151 100644
--- a/pl/math/tools/log1pf.sollya
+++ b/math/tools/log1pf.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating log(1+x) in single precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 10;
diff --git a/pl/math/tools/sincos.sollya b/math/tools/sincos.sollya
similarity index 92%
rename from pl/math/tools/sincos.sollya
rename to math/tools/sincos.sollya
index 7d36266b446b73..600368507f4ee6 100644
--- a/pl/math/tools/sincos.sollya
+++ b/math/tools/sincos.sollya
@@ -1,9 +1,9 @@
 // polynomial for approximating cos(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
-// This script only finds the coeffs for cos - see math/aarch64/v_sin.c for sin coeffs
+// This script only finds the coeffs for cos - see math/aarch64/advsimd/sin.c for sin coeffs
 
 deg = 14;   // polynomial degree
 a = -pi/4; // interval
diff --git a/pl/math/tools/sincosf.sollya b/math/tools/sincosf.sollya
similarity index 95%
rename from pl/math/tools/sincosf.sollya
rename to math/tools/sincosf.sollya
index 178ee83ac19607..add874e87a9a18 100644
--- a/pl/math/tools/sincosf.sollya
+++ b/math/tools/sincosf.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating cos(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // This script only finds the coeffs for cos - see math/tools/sin.sollya for sin coeffs.
diff --git a/pl/math/tools/sinpi.sollya b/math/tools/sinpi.sollya
similarity index 95%
rename from pl/math/tools/sinpi.sollya
rename to math/tools/sinpi.sollya
index 62cc87e7697d47..9bc5b1c7fc2a60 100644
--- a/pl/math/tools/sinpi.sollya
+++ b/math/tools/sinpi.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating sinpi(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 19;  // polynomial degree
diff --git a/pl/math/tools/tan.sollya b/math/tools/tan.sollya
similarity index 91%
rename from pl/math/tools/tan.sollya
rename to math/tools/tan.sollya
index bb0bb28270e33a..ca8a170bedaa9d 100644
--- a/pl/math/tools/tan.sollya
+++ b/math/tools/tan.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating double precision tan(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 8;
diff --git a/pl/math/tools/tanf.sollya b/math/tools/tanf.sollya
similarity index 98%
rename from pl/math/tools/tanf.sollya
rename to math/tools/tanf.sollya
index f4b49b40ae64ea..054d3db4404696 100644
--- a/pl/math/tools/tanf.sollya
+++ b/math/tools/tanf.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating single precision tan(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 dtype = single;
diff --git a/math/tools/tanpi.sollya b/math/tools/tanpi.sollya
new file mode 100644
index 00000000000000..8edbc359ab8e45
--- /dev/null
+++ b/math/tools/tanpi.sollya
@@ -0,0 +1,48 @@
+// polynomial for approximating tanpi/f(x)
+//
+// Copyright (c) 2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// 0 for tanpi/f [0,0.25], 1 for tanpi/f [0.25,1]
+method = 0;
+dtype = double;
+
+if (dtype == single) then {
+    if (method == 0) then { deg = 5; }
+    else if (method == 1) then { deg = 3; };
+} else if (dtype == double) then {
+    if (method == 0) then { deg = 13; }
+    else if (method == 1) then { deg = 8; };
+};
+
+a = 0x1.0p-126;
+b = 1/4;
+
+if (method == 0) then {
+    g = tan(pi * x);
+    F = proc(P) { return pi * x + x^3 * P(x^2); };
+    f = (g(sqrt(x)) - pi * sqrt(x))/(x^(3/2));
+} else if (method == 1) then {
+    g = 1/tan(pi * x);
+    F = proc(P) { return 1/(pi * x) + x * P(x^2); };
+    f = (g(sqrt(x)) / sqrt(x)) - 1/(pi * x);
+};
+
+poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]);
+
+//
+// Display coefficients in Sollya
+//
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+print("_coeffs :_ hex");
+for i from 0 to deg do coeff(poly, i);
+
+// Compute errors
+//display = hexadecimal!;
+d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]);
+d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]);
+print("dirty rel error:", d_rel_err);
+print("dirty abs error:", d_abs_err);
+print("in [",a,b,"]");
diff --git a/pl/math/tools/v_erf.sollya b/math/tools/v_erf.sollya
similarity index 91%
rename from pl/math/tools/v_erf.sollya
rename to math/tools/v_erf.sollya
index 394ba377df12b5..5d7795842bcd89 100644
--- a/pl/math/tools/v_erf.sollya
+++ b/math/tools/v_erf.sollya
@@ -2,7 +2,7 @@
 // To generate coefficients for interval i (0 to 47) do:
 // $ sollya v_erf.sollya $i
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 scale = 1/8;
diff --git a/pl/math/tools/v_erfc.sollya b/math/tools/v_erfc.sollya
similarity index 96%
rename from pl/math/tools/v_erfc.sollya
rename to math/tools/v_erfc.sollya
index 3b03ba07863dd4..764b333d6d258d 100644
--- a/pl/math/tools/v_erfc.sollya
+++ b/math/tools/v_erfc.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating erfc(x)*exp(x*x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 12; // poly degree
diff --git a/pl/math/tools/v_log10.sollya b/math/tools/v_log10.sollya
similarity index 96%
rename from pl/math/tools/v_log10.sollya
rename to math/tools/v_log10.sollya
index e2df4364ada016..5181074f676263 100644
--- a/pl/math/tools/v_log10.sollya
+++ b/math/tools/v_log10.sollya
@@ -1,6 +1,6 @@
 // polynomial used for __v_log10(x)
 //
-// Copyright (c) 2019-2023, Arm Limited.
+// Copyright (c) 2019-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 6; // poly degree
diff --git a/pl/math/tools/v_log10f.sollya b/math/tools/v_log10f.sollya
similarity index 96%
rename from pl/math/tools/v_log10f.sollya
rename to math/tools/v_log10f.sollya
index 396d5a92302bd3..4906cb1d2137aa 100644
--- a/pl/math/tools/v_log10f.sollya
+++ b/math/tools/v_log10f.sollya
@@ -1,6 +1,6 @@
 // polynomial for approximating v_log10f(1+x)
 //
-// Copyright (c) 2019-2023, Arm Limited.
+// Copyright (c) 2019-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 9; // poly degree
diff --git a/pl/math/tools/v_log2f.sollya b/math/tools/v_log2f.sollya
similarity index 96%
rename from pl/math/tools/v_log2f.sollya
rename to math/tools/v_log2f.sollya
index 99e050c91b0310..337d4830a2aede 100644
--- a/pl/math/tools/v_log2f.sollya
+++ b/math/tools/v_log2f.sollya
@@ -1,6 +1,6 @@
 // polynomial used for __v_log2f(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 9; // poly degree
diff --git a/networking/Dir.mk b/networking/Dir.mk
index 2589e0a1f91c47..b3ca2ff335e454 100644
--- a/networking/Dir.mk
+++ b/networking/Dir.mk
@@ -1,6 +1,6 @@
 # Makefile fragment - requires GNU make
 #
-# Copyright (c) 2019-2020, Arm Limited.
+# Copyright (c) 2019-2025, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 S := $(srcdir)/networking
@@ -46,12 +46,12 @@ $(networking-objs): CFLAGS_ALL += $(networking-cflags)
 build/lib/libnetworking.so: $(networking-lib-objs:%.o=%.os)
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
 
-build/lib/libnetworkinglib.a: $(networking-lib-objs)
+build/lib/libnetworking.a: $(networking-lib-objs)
 	rm -f $@
 	$(AR) rc $@ $^
 	$(RANLIB) $@
 
-build/bin/test/%: $(B)/test/%.o build/lib/libnetworkinglib.a
+build/bin/test/%: $(B)/test/%.o build/lib/libnetworking.a
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
 
 build/include/%.h: $(S)/include/%.h
diff --git a/pl/Dir.mk b/pl/Dir.mk
deleted file mode 100644
index 2d007790d24145..00000000000000
--- a/pl/Dir.mk
+++ /dev/null
@@ -1,21 +0,0 @@
-# Makefile fragment - requires GNU make
-#
-# Copyright (c) 2022, Arm Limited.
-# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-# These targets are defined if we prescribe pl in SUBS.
-# It requires PLSUBS to be set.
-
-$(foreach sub,$(PLSUBS),$(eval include $(srcdir)/pl/$(sub)/Dir.mk))
-
-pl-files := $($(PLSUBS:%=pl/%-files))
-
-all-pl: $(PLSUBS:%=all-pl/%)
-
-check-pl: $(PLSUBS:%=check-pl/%)
-
-install-pl: $(PLSUBS:%=install-pl/%)
-
-clean-pl: $(PLSUBS:%=clean-pl/%)
-
-.PHONY: all-pl check-pl install-pl clean-pl
diff --git a/pl/math/Dir.mk b/pl/math/Dir.mk
deleted file mode 100644
index 94b26cf3309c5d..00000000000000
--- a/pl/math/Dir.mk
+++ /dev/null
@@ -1,216 +0,0 @@
-# Makefile fragment - requires GNU make
-#
-# Copyright (c) 2019-2024, Arm Limited.
-# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-PLM := $(srcdir)/pl/math
-AOR := $(srcdir)/math
-B := build/pl/math
-
-pl-lib-srcs := $(wildcard $(PLM)/*.[cS])
-
-ifeq ($(WANT_SVE_MATH), 0)
-pl-lib-srcs := $(filter-out $(PLM)/sv_%, $(pl-lib-srcs))
-endif
-
-math-test-srcs := \
-	$(AOR)/test/mathtest.c \
-	$(AOR)/test/mathbench.c \
-	$(AOR)/test/ulp.c \
-
-math-test-host-srcs := $(wildcard $(AOR)/test/rtest/*.[cS])
-
-pl-includes := $(patsubst $(PLM)/%,build/pl/%,$(wildcard $(PLM)/include/*.h))
-pl-test-includes := $(patsubst $(PLM)/%,build/pl/include/%,$(wildcard $(PLM)/test/*.h))
-
-pl-libs := \
-	build/pl/lib/libmathlib.so \
-	build/pl/lib/libmathlib.a \
-
-math-tools := \
-	build/pl/bin/mathtest \
-	build/pl/bin/mathbench \
-	build/pl/bin/mathbench_libc \
-	build/pl/bin/runulp.sh \
-	build/pl/bin/ulp \
-
-math-host-tools := \
-	build/pl/bin/rtest \
-
-pl-lib-objs := $(patsubst $(PLM)/%,$(B)/%.o,$(basename $(pl-lib-srcs)))
-math-test-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-srcs)))
-math-host-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
-pl-target-objs := $(pl-lib-objs) $(math-test-objs)
-pl-objs := $(pl-target-objs) $(pl-target-objs:%.o=%.os) $(math-host-objs)
-
-pl/math-files := \
-	$(pl-objs) \
-	$(pl-libs) \
-	$(math-tools) \
-	$(math-host-tools) \
-	$(pl-includes) \
-	$(pl-test-includes) \
-
-all-pl/math: $(pl-libs) $(math-tools) $(pl-includes) $(pl-test-includes)
-
-$(pl-objs): $(pl-includes) $(pl-test-includes)
-$(pl-objs): CFLAGS_PL += $(math-cflags)
-$(B)/test/mathtest.o: CFLAGS_PL += -fmath-errno
-$(math-host-objs): CC = $(HOST_CC)
-$(math-host-objs): CFLAGS_PL = $(HOST_CFLAGS)
-
-$(B)/sv_%: CFLAGS_PL += $(math-sve-cflags)
-
-build/pl/include/test/ulp_funcs_gen.h: $(pl-lib-srcs)
-	# Replace PL_SIG
-	cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=_Z##v##t##a(f)" -P > $@
-
-build/pl/include/test/mathbench_funcs_gen.h: $(pl-lib-srcs)
-	# Replace PL_SIG macros with mathbench func entries
-	cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=_Z##v##t##a(f, ##__VA_ARGS__)" -P > $@
-
-build/pl/include/test/ulp_wrappers_gen.h: $(pl-lib-srcs)
-	# Replace PL_SIG macros with ULP wrapper declarations
-	cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=Z##v##N##t##a##_WRAP(f)" -P > $@
-
-$(B)/test/ulp.o: $(AOR)/test/ulp.h build/pl/include/test/ulp_funcs_gen.h build/pl/include/test/ulp_wrappers_gen.h
-$(B)/test/ulp.o: CFLAGS_PL += -I build/pl/include/test
-
-$(B)/test/mathbench.o: build/pl/include/test/mathbench_funcs_gen.h
-$(B)/test/mathbench.o: CFLAGS_PL += -I build/pl/include/test
-
-build/pl/lib/libmathlib.so: $(pl-lib-objs:%.o=%.os)
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -shared -o $@ $^
-
-build/pl/lib/libmathlib.a: $(pl-lib-objs)
-	rm -f $@
-	$(AR) rc $@ $^
-	$(RANLIB) $@
-
-$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
-$(math-tools): LDLIBS += $(math-ldlibs) -lm
-# math-sve-cflags should be empty if WANT_SVE_MATH is not enabled
-$(math-tools): CFLAGS_PL += $(math-sve-cflags)
-
-# Some targets to build pl/math/test from math/test sources
-build/pl/math/test/%.o: $(srcdir)/math/test/%.S
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/math/test/%.o: $(srcdir)/math/test/%.c
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/math/test/%.os: $(srcdir)/math/test/%.S
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/math/test/%.os: $(srcdir)/math/test/%.c
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-# Some targets to build pl/ sources using appropriate flags
-build/pl/%.o: $(srcdir)/pl/%.S
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/%.o: $(srcdir)/pl/%.c
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/%.os: $(srcdir)/pl/%.S
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/%.os: $(srcdir)/pl/%.c
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/bin/rtest: $(math-host-objs)
-	$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
-
-build/pl/bin/mathtest: $(B)/test/mathtest.o build/pl/lib/libmathlib.a
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
-
-build/pl/bin/mathbench: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
-
-# This is not ideal, but allows custom symbols in mathbench to get resolved.
-build/pl/bin/mathbench_libc: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/pl/lib/libmathlib.a -lm
-
-build/pl/bin/ulp: $(B)/test/ulp.o build/pl/lib/libmathlib.a
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
-
-build/pl/include/%.h: $(PLM)/include/%.h
-	cp $< $@
-
-build/pl/include/test/%.h: $(PLM)/test/%.h
-	cp $< $@
-
-build/pl/bin/%.sh: $(PLM)/test/%.sh
-	cp $< $@
-
-pl-math-tests := $(wildcard $(PLM)/test/testcases/directed/*.tst)
-pl-math-rtests := $(wildcard $(PLM)/test/testcases/random/*.tst)
-
-check-pl/math-test: $(math-tools)
-	cat $(pl-math-tests) | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
-
-check-pl/math-rtest: $(math-host-tools) $(math-tools)
-	cat $(pl-math-rtests) | build/pl/bin/rtest | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
-
-ulp-input-dir=$(B)/test/inputs
-
-math-lib-lims = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.ulp,$(basename $(pl-lib-srcs)))
-math-lib-fenvs = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.fenv,$(basename $(pl-lib-srcs)))
-math-lib-itvs = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.itv,$(basename $(pl-lib-srcs)))
-
-ulp-inputs = $(math-lib-lims) $(math-lib-fenvs) $(math-lib-itvs)
-
-$(ulp-inputs): CFLAGS_PL += -I$(PLM) -I$(PLM)/include $(math-cflags)
-
-$(ulp-input-dir)/%.ulp: $(PLM)/%.c
-	mkdir -p $(@D)
-	$(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep -o "PL_TEST_ULP [^ ]* [^ ]*" || true; } > $@
-
-$(ulp-input-dir)/%.fenv: $(PLM)/%.c
-	mkdir -p $(@D)
-	$(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep -o "PL_TEST_EXPECT_FENV_ENABLED [^ ]*" || true; } > $@
-
-$(ulp-input-dir)/%.itv: $(PLM)/%.c
-	mkdir -p $(dir $@)
-	$(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep "PL_TEST_INTERVAL " || true; } | sed "s/ PL_TEST_INTERVAL/\nPL_TEST_INTERVAL/g" > $@
-
-ulp-lims := $(ulp-input-dir)/limits
-$(ulp-lims): $(math-lib-lims)
-	cat $^ | sed "s/PL_TEST_ULP //g;s/^ *//g" > $@
-
-fenv-exps := $(ulp-input-dir)/fenv
-$(fenv-exps): $(math-lib-fenvs)
-	cat $^ | sed "s/PL_TEST_EXPECT_FENV_ENABLED //g;s/^ *//g" > $@
-
-ulp-itvs := $(ulp-input-dir)/intervals
-$(ulp-itvs): $(math-lib-itvs)
-	cat $^ | sort -u | sed "s/PL_TEST_INTERVAL //g" > $@
-
-check-pl/math-ulp: $(math-tools) $(ulp-lims) $(fenv-exps) $(ulp-itvs)
-	WANT_SVE_MATH=$(WANT_SVE_MATH) \
-	ULPFLAGS="$(math-ulpflags)" \
-	LIMITS=../../../$(ulp-lims) \
-	INTERVALS=../../../$(ulp-itvs) \
-	FENV=../../../$(fenv-exps) \
-	FUNC=$(func) \
-	build/pl/bin/runulp.sh $(EMULATOR)
-
-check-pl/math: check-pl/math-test check-pl/math-rtest check-pl/math-ulp
-
-$(DESTDIR)$(libdir)/pl/%.so: build/pl/lib/%.so
-	$(INSTALL) -D $< $@
-
-$(DESTDIR)$(libdir)/pl/%: build/pl/lib/%
-	$(INSTALL) -m 644 -D $< $@
-
-$(DESTDIR)$(includedir)/pl/%: build/pl/include/%
-	$(INSTALL) -m 644 -D $< $@
-
-install-pl/math: \
- $(pl-libs:build/pl/lib/%=$(DESTDIR)$(libdir)/pl/%) \
- $(pl-includes:build/pl/include/%=$(DESTDIR)$(includedir)/pl/%)
-
-clean-pl/math:
-	rm -f $(pl/math-files)
-
-.PHONY: all-pl/math check-pl/math-test check-pl/math-rtest check-pl/math-ulp check-pl/math install-pl/math clean-pl/math
diff --git a/pl/math/asinhf_data.c b/pl/math/asinhf_data.c
deleted file mode 100644
index cd1ef16b3b6a61..00000000000000
--- a/pl/math/asinhf_data.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Coefficients for single-precision asinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya for
-   these coeffs were generated.  */
-const struct asinhf_data __asinhf_data
-  = {.coeffs
-     = {-0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f, 0x1.3a81dcp-4f,
-	0x1.65bbaap-10f, -0x1.057f1p-4f, 0x1.6c1d46p-5f, -0x1.4cafe8p-7f}};
diff --git a/pl/math/atan_data.c b/pl/math/atan_data.c
deleted file mode 100644
index 91d0f61d2eaf43..00000000000000
--- a/pl/math/atan_data.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Double-precision polynomial coefficients for vector atan(x) and atan2(y,x).
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-const struct atan_poly_data __atan_poly_data = {
-  .poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
-	      [2**-1022, 1.0]. See atan.sollya for details of how these were
-	      generated.  */
-	   -0x1.5555555555555p-2,  0x1.99999999996c1p-3,  -0x1.2492492478f88p-3,
-	   0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-	   -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5,  -0x1.aebfe7b418581p-5,
-	   0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-	   -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6,  -0x1.0051381722a59p-6,
-	   0x1.14e9dc19a4a4ep-7,   -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-	   -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};
diff --git a/pl/math/atanf_data.c b/pl/math/atanf_data.c
deleted file mode 100644
index c4cba2378ceaee..00000000000000
--- a/pl/math/atanf_data.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Single-precision polynomial coefficients for vector atan(x) and atan2(y,x).
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
- */
-const struct atanf_poly_data __atanf_poly_data = {
-  .poly = {/* See atanf.sollya for details of how these were generated.  */
-	   -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-	   -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};
diff --git a/pl/math/exp_data.c b/pl/math/exp_data.c
deleted file mode 100644
index 2354be76cfab6d..00000000000000
--- a/pl/math/exp_data.c
+++ /dev/null
@@ -1,1120 +0,0 @@
-/*
- * Shared data between exp, exp2 and pow.
- *
- * Copyright (c) 2018-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#define N (1 << EXP_TABLE_BITS)
-
-const struct exp_data __exp_data = {
-// N/ln2
-.invln2N = 0x1.71547652b82fep0 * N,
-// -ln2/N
-#if N == 64
-.negln2hiN = -0x1.62e42fefa0000p-7,
-.negln2loN = -0x1.cf79abc9e3b3ap-46,
-#elif N == 128
-.negln2hiN = -0x1.62e42fefa0000p-8,
-.negln2loN = -0x1.cf79abc9e3b3ap-47,
-#elif N == 256
-.negln2hiN = -0x1.62e42fefc0000p-9,
-.negln2loN = 0x1.c610ca86c3899p-45,
-#elif N == 512
-.negln2hiN = -0x1.62e42fef80000p-10,
-.negln2loN = -0x1.1cf79abc9e3b4p-45,
-#endif
-// Used for rounding when !TOINT_INTRINSICS
-#if EXP_USE_TOINT_NARROW
-.shift = 0x1800000000.8p0,
-#else
-.shift = 0x1.8p52,
-#endif
-// exp polynomial coefficients.
-.poly = {
-#if N == 64 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
-// abs error: 1.5543*2^-60
-// ulp error: 0.529 (0.533 without fma)
-// if |x| < ln2/128+eps
-// abs error if |x| < ln2/64: 1.7157*2^-50
-0x1.fffffffffdbcdp-2,
-0x1.555555555444cp-3,
-0x1.555573c6a9f7dp-5,
-0x1.1111266d28935p-7,
-#elif N == 64 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
-// abs error: 1.6735*2^-64
-// ulp error: 0.518 (0.522 without fma)
-// if |x| < ln2/64
-0x1.5555555548f9ap-3,
-0x1.555555554bf5dp-5,
-0x1.11115b75f0f4dp-7,
-0x1.6c171a6b6303ep-10,
-#elif N == 128 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
-// abs error: 1.555*2^-66
-// ulp error: 0.509 (0.511 without fma)
-// if |x| < ln2/256+eps
-// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
-// abs error if |x| < ln2/128: 1.7145*2^-56
-0x1.ffffffffffdbdp-2,
-0x1.555555555543cp-3,
-0x1.55555cf172b91p-5,
-0x1.1111167a4d017p-7,
-#elif N == 128 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
-// abs error: 1.5542*2^-60
-// ulp error: 0.521 (0.523 without fma)
-// if |x| < ln2/128
-0x1.fffffffffdbcep-2,
-0x1.55555555543c2p-3,
-0x1.555573c64f2e3p-5,
-0x1.111126b4eff73p-7,
-#elif N == 128 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
-// abs error: 1.6861*2^-71
-// ulp error: 0.509 (0.511 without fma)
-// if |x| < ln2/128
-0x1.55555555548fdp-3,
-0x1.555555555658fp-5,
-0x1.111123a859bb6p-7,
-0x1.6c16ba6920cabp-10,
-#elif N == 256 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
-// abs error: 1.43*2^-58
-// ulp error: 0.549 (0.550 without fma)
-// if |x| < ln2/512
-0x1p0, // unused
-0x1.fffffffffffd4p-2,
-0x1.5555571d6ef9p-3,
-0x1.5555576a5adcep-5,
-#elif N == 256 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
-// abs error: 1.5547*2^-66
-// ulp error: 0.505 (0.506 without fma)
-// if |x| < ln2/256
-0x1.ffffffffffdbdp-2,
-0x1.555555555543cp-3,
-0x1.55555cf16e1edp-5,
-0x1.1111167a4b553p-7,
-#elif N == 512 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
-// abs error: 1.4300*2^-63
-// ulp error: 0.504
-// if |x| < ln2/1024
-// abs error if |x| < ln2/512: 1.0689*2^-55
-0x1p0, // unused
-0x1.ffffffffffffdp-2,
-0x1.555555c75bb6p-3,
-0x1.555555dec04a8p-5,
-#endif
-},
-.exp2_shift = 0x1.8p52 / N,
-// exp2 polynomial coefficients.
-.exp2_poly = {
-#if N == 64 && EXP2_POLY_ORDER == 6 && EXP2_POLY_WIDE
-// abs error: 1.3054*2^-63
-// ulp error: 0.515
-// if |x| < 1/64
-0x1.62e42fefa39efp-1,
-0x1.ebfbdff82c58fp-3,
-0x1.c6b08d7045cf1p-5,
-0x1.3b2ab6fb8fd0ep-7,
-0x1.5d884afec48d7p-10,
-0x1.43097dc684ae1p-13,
-#elif N == 128 && EXP2_POLY_ORDER == 5 && !EXP2_POLY_WIDE
-// abs error: 1.2195*2^-65
-// ulp error: 0.507 (0.511 without fma)
-// if |x| < 1/256
-// abs error if |x| < 1/128: 1.9941*2^-56
-0x1.62e42fefa39efp-1,
-0x1.ebfbdff82c424p-3,
-0x1.c6b08d70cf4b5p-5,
-0x1.3b2abd24650ccp-7,
-0x1.5d7e09b4e3a84p-10,
-#elif N == 256 && EXP2_POLY_ORDER == 5 && EXP2_POLY_WIDE
-// abs error: 1.2195*2^-65
-// ulp error: 0.504 (0.508 without fma)
-// if |x| < 1/256
-0x1.62e42fefa39efp-1,
-0x1.ebfbdff82c424p-3,
-0x1.c6b08d70cf4b5p-5,
-0x1.3b2abd24650ccp-7,
-0x1.5d7e09b4e3a84p-10,
-#elif N == 512 && EXP2_POLY_ORDER == 4 && !EXP2_POLY_WIDE
-// abs error: 1.4411*2^-64
-// ulp error: 0.5024 (0.5063 without fma)
-// if |x| < 1/1024
-// abs error if |x| < 1/512: 1.9430*2^-56
-0x1.62e42fefa39ecp-1,
-0x1.ebfbdff82c58bp-3,
-0x1.c6b08e46de41fp-5,
-0x1.3b2ab786ee1dap-7,
-#endif
-},
-// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
-// tab[2*k] = asuint64(T[k])
-// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
-.tab = {
-#if N == 64
-0x0, 0x3ff0000000000000,
-0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
-0x3c8cd2523567f613, 0x3fefd9b0d3158574,
-0x3c60f74e61e6c861, 0x3fefc74518759bc8,
-0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
-0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
-0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
-0xbc91c923b9d5f416, 0x3fef829aaea92de0,
-0xbc801b15eaa59348, 0x3fef72b83c7d517b,
-0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
-0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
-0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
-0x3c968efde3a8a894, 0x3fef387a6e756238,
-0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
-0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
-0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
-0x3c834d754db0abb6, 0x3fef06fe0a31b715,
-0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
-0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
-0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
-0x3c859f48a72a4c6d, 0x3feedea64c123422,
-0xbc58a78f4817895b, 0x3feed60a21f72e2a,
-0x3c4363ed60c2ac11, 0x3feece086061892d,
-0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
-0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
-0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
-0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
-0x3c93350518fdd78e, 0x3feeaf4736b527da,
-0x3c9063e1e21c5409, 0x3feeab07dd485429,
-0x3c9432e62b64c035, 0x3feea76f15ad2148,
-0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
-0xbc93cedd78565858, 0x3feea23882552225,
-0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
-0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
-0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
-0xbc8619321e55e68a, 0x3fee9feb564267c9,
-0xbc7b32dcb94da51d, 0x3feea11473eb0187,
-0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
-0xbc9369b6f13b3734, 0x3feea589994cce13,
-0xbc94d450d872576e, 0x3feea8d99b4492ed,
-0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
-0x3c7bf68359f35f44, 0x3feeb1ae99157736,
-0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
-0xbc92434322f4f9aa, 0x3feebd829fde4e50,
-0x3c71affc2b91ce27, 0x3feec49182a3f090,
-0xbc87c50422622263, 0x3feecc667b5de565,
-0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
-0x3c8469846e735ab3, 0x3feede6b5579fdbf,
-0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
-0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
-0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
-0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
-0x3c736eae30af0cb3, 0x3fef199bdd85529c,
-0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
-0x3c676b2c6c921968, 0x3fef3720dcef9069,
-0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
-0x3c74a385a63d07a7, 0x3fef5818dcfba487,
-0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
-0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
-0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
-0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
-0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
-0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
-0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
-#elif N == 128
-0x0, 0x3ff0000000000000,
-0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
-0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
-0xbc905e7a108766d1, 0x3fefe315e86e7f85,
-0x3c8cd2523567f613, 0x3fefd9b0d3158574,
-0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
-0x3c60f74e61e6c861, 0x3fefc74518759bc8,
-0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
-0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
-0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
-0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
-0xbc6a033489906e0b, 0x3fef9b66affed31b,
-0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
-0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
-0xbc91c923b9d5f416, 0x3fef829aaea92de0,
-0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
-0xbc801b15eaa59348, 0x3fef72b83c7d517b,
-0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
-0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
-0xbc96d99c7611eb26, 0x3fef5be084045cd4,
-0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
-0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
-0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
-0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
-0x3c968efde3a8a894, 0x3fef387a6e756238,
-0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
-0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
-0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
-0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
-0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
-0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
-0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
-0x3c834d754db0abb6, 0x3fef06fe0a31b715,
-0x3c864201e2ac744c, 0x3fef0170fc4cd831,
-0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
-0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
-0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
-0xbc9907f81b512d8e, 0x3feeecae6d05d866,
-0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
-0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
-0x3c859f48a72a4c6d, 0x3feedea64c123422,
-0xbc9312607a28698a, 0x3feeda4504ac801c,
-0xbc58a78f4817895b, 0x3feed60a21f72e2a,
-0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
-0x3c4363ed60c2ac11, 0x3feece086061892d,
-0x3c9666093b0664ef, 0x3feeca41ed1d0057,
-0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
-0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
-0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
-0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
-0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
-0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
-0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
-0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
-0x3c93350518fdd78e, 0x3feeaf4736b527da,
-0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
-0x3c9063e1e21c5409, 0x3feeab07dd485429,
-0x3c34c7855019c6ea, 0x3feea9268a5946b7,
-0x3c9432e62b64c035, 0x3feea76f15ad2148,
-0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
-0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
-0xbc845378892be9ae, 0x3feea34634ccc320,
-0xbc93cedd78565858, 0x3feea23882552225,
-0x3c5710aa807e1964, 0x3feea155d44ca973,
-0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
-0xbc6a12ad8734b982, 0x3feea012750bdabf,
-0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
-0xbc80dc3d54e08851, 0x3fee9f7df9519484,
-0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
-0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
-0xbc8619321e55e68a, 0x3fee9feb564267c9,
-0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
-0xbc7b32dcb94da51d, 0x3feea11473eb0187,
-0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
-0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
-0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
-0xbc9369b6f13b3734, 0x3feea589994cce13,
-0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
-0xbc94d450d872576e, 0x3feea8d99b4492ed,
-0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
-0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
-0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
-0x3c7bf68359f35f44, 0x3feeb1ae99157736,
-0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
-0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
-0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
-0xbc92434322f4f9aa, 0x3feebd829fde4e50,
-0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
-0x3c71affc2b91ce27, 0x3feec49182a3f090,
-0x3c6dd235e10a73bb, 0x3feec86319e32323,
-0xbc87c50422622263, 0x3feecc667b5de565,
-0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
-0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
-0x3c90cc319cee31d2, 0x3feed99e1330b358,
-0x3c8469846e735ab3, 0x3feede6b5579fdbf,
-0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
-0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
-0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
-0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
-0xbc90a40e3da6f640, 0x3feef9728de5593a,
-0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
-0xbc91eee26b588a35, 0x3fef05b030a1064a,
-0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
-0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
-0x3c736eae30af0cb3, 0x3fef199bdd85529c,
-0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
-0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
-0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
-0x3c676b2c6c921968, 0x3fef3720dcef9069,
-0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
-0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
-0xbc900dae3875a949, 0x3fef4f87080d89f2,
-0x3c74a385a63d07a7, 0x3fef5818dcfba487,
-0xbc82919e2040220f, 0x3fef60e316c98398,
-0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
-0x3c843a59ac016b4b, 0x3fef7321f301b460,
-0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
-0xbc892ab93b470dc9, 0x3fef864614f5a129,
-0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
-0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
-0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
-0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
-0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
-0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
-0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
-0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
-0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
-0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
-#elif N == 256
-0x0, 0x3ff0000000000000,
-0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
-0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
-0xbc82985dd8521d32, 0x3feff168143b0281,
-0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
-0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
-0xbc905e7a108766d1, 0x3fefe315e86e7f85,
-0x3c845fad437fa426, 0x3fefde5f72f654b1,
-0x3c8cd2523567f613, 0x3fefd9b0d3158574,
-0xbc954529642b232f, 0x3fefd50a0e3c1f89,
-0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
-0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
-0x3c60f74e61e6c861, 0x3fefc74518759bc8,
-0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
-0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
-0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
-0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
-0x3c9407fb30d06420, 0x3fefb0f145e46c85,
-0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
-0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
-0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
-0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
-0xbc6a033489906e0b, 0x3fef9b66affed31b,
-0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
-0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
-0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
-0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
-0xbc65704e90c9f860, 0x3fef86a814f204ab,
-0xbc91c923b9d5f416, 0x3fef829aaea92de0,
-0xbc897cea57e46280, 0x3fef7e95934f312e,
-0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
-0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
-0xbc801b15eaa59348, 0x3fef72b83c7d517b,
-0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
-0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
-0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
-0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
-0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
-0xbc96d99c7611eb26, 0x3fef5be084045cd4,
-0x3c8cdc1873af2155, 0x3fef582f95281c6b,
-0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
-0xbc9493684653a131, 0x3fef50e75eb44027,
-0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
-0xbc98e2899077520a, 0x3fef49c18438ce4d,
-0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
-0x3c9120fcd4f59273, 0x3fef42be3578a819,
-0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
-0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
-0x3c968efde3a8a894, 0x3fef387a6e756238,
-0x3c877afbca90ef84, 0x3fef351ffb82140a,
-0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
-0x3c91512f082876ee, 0x3fef2e85711ece75,
-0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
-0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
-0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
-0xbc803297e78260bf, 0x3fef21ba7591bb70,
-0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
-0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
-0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
-0xbc91e75c40b4251e, 0x3fef157e39771b2f,
-0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
-0x3c98a911f1f7785a, 0x3fef0f961f641589,
-0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
-0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
-0x3c834d754db0abb6, 0x3fef06fe0a31b715,
-0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
-0x3c864201e2ac744c, 0x3fef0170fc4cd831,
-0xbc979517a03e2847, 0x3feefeb83ba8ea32,
-0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
-0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
-0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
-0xbc87430803972b34, 0x3feef431a2de883b,
-0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
-0xbc954de30ae02d94, 0x3feeef26231e754a,
-0xbc9907f81b512d8e, 0x3feeecae6d05d866,
-0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
-0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
-0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
-0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
-0x3c79c3bba5562a2f, 0x3feee0e544ede173,
-0x3c859f48a72a4c6d, 0x3feedea64c123422,
-0xbc85a71612e21658, 0x3feedc70df1c5175,
-0xbc9312607a28698a, 0x3feeda4504ac801c,
-0x3c86421f6f1d24d6, 0x3feed822c367a024,
-0xbc58a78f4817895b, 0x3feed60a21f72e2a,
-0xbc9348a6815fce65, 0x3feed3fb2709468a,
-0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
-0x3c835c43984d9871, 0x3feecffa3f84b9d4,
-0x3c4363ed60c2ac11, 0x3feece086061892d,
-0xbc632afc8d9473a0, 0x3feecc2042a7d232,
-0x3c9666093b0664ef, 0x3feeca41ed1d0057,
-0xbc95fc5e44de020e, 0x3feec86d668b3237,
-0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
-0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
-0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
-0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
-0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
-0x3c892ca3bf144e63, 0x3feebe41b817c114,
-0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
-0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
-0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
-0x3c73e34f67e67118, 0x3feeb8417f4531ee,
-0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
-0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
-0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
-0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
-0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
-0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
-0x3c93350518fdd78e, 0x3feeaf4736b527da,
-0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
-0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
-0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
-0x3c9063e1e21c5409, 0x3feeab07dd485429,
-0xbc943a3540d1898a, 0x3feeaa11fba87a03,
-0x3c34c7855019c6ea, 0x3feea9268a5946b7,
-0xbc951f58ddaa8090, 0x3feea84590998b93,
-0x3c9432e62b64c035, 0x3feea76f15ad2148,
-0xbc82e1648e50a17c, 0x3feea6a320dceb71,
-0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
-0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
-0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
-0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
-0xbc845378892be9ae, 0x3feea34634ccc320,
-0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
-0xbc93cedd78565858, 0x3feea23882552225,
-0xbc85c33fdf910406, 0x3feea1c1c70833f6,
-0x3c5710aa807e1964, 0x3feea155d44ca973,
-0x3c81079ab5789604, 0x3feea0f4b19e9538,
-0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
-0x3c727df161cd7778, 0x3feea052fa75173e,
-0xbc6a12ad8734b982, 0x3feea012750bdabf,
-0x3c93f9924a05b767, 0x3fee9fdcddd47645,
-0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
-0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
-0xbc80dc3d54e08851, 0x3fee9f7df9519484,
-0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
-0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
-0xbc88e67a9006c909, 0x3fee9f8286ead08a,
-0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
-0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
-0xbc8619321e55e68a, 0x3fee9feb564267c9,
-0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
-0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
-0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
-0xbc7b32dcb94da51d, 0x3feea11473eb0187,
-0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
-0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
-0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
-0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
-0xbc760a3629969871, 0x3feea3878491c491,
-0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
-0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
-0xbc9369b6f13b3734, 0x3feea589994cce13,
-0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
-0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
-0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
-0xbc94d450d872576e, 0x3feea8d99b4492ed,
-0x3c7c88549b958471, 0x3feea9cad931a436,
-0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
-0x3c931143962f7877, 0x3feeabd0a478580f,
-0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
-0x3c93e9e96f112479, 0x3feeae05bad61778,
-0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
-0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
-0x3c7bf68359f35f44, 0x3feeb1ae99157736,
-0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
-0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
-0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
-0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
-0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
-0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
-0xbc51669428996971, 0x3feebbdd9a7670b3,
-0xbc92434322f4f9aa, 0x3feebd829fde4e50,
-0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
-0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
-0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
-0x3c71affc2b91ce27, 0x3feec49182a3f090,
-0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
-0x3c6dd235e10a73bb, 0x3feec86319e32323,
-0xbc79740b58a20091, 0x3feeca5e8d07f29e,
-0xbc87c50422622263, 0x3feecc667b5de565,
-0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
-0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
-0xbc903d5cbe27874b, 0x3feed2c980460ad8,
-0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
-0x3c5986178980fce0, 0x3feed74a8af46052,
-0x3c90cc319cee31d2, 0x3feed99e1330b358,
-0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
-0x3c8469846e735ab3, 0x3feede6b5579fdbf,
-0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
-0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
-0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
-0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
-0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
-0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
-0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
-0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
-0xbc7274aedac8ff80, 0x3feef68415b749b1,
-0xbc90a40e3da6f640, 0x3feef9728de5593a,
-0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
-0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
-0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
-0xbc91eee26b588a35, 0x3fef05b030a1064a,
-0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
-0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
-0xbc302899507554e5, 0x3fef0f69c3f3a207,
-0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
-0xbc80dda2d4c0010c, 0x3fef16286141b33d,
-0x3c736eae30af0cb3, 0x3fef199bdd85529c,
-0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
-0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
-0x3c836909391181d3, 0x3fef244778fafb22,
-0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
-0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
-0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
-0xbc7ac28b7bef6621, 0x3fef33405751c4db,
-0x3c676b2c6c921968, 0x3fef3720dcef9069,
-0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
-0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
-0xbc8cc734592af7fc, 0x3fef43155b5bab74,
-0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
-0x3c87752a44f587e8, 0x3fef4b532b08c968,
-0xbc900dae3875a949, 0x3fef4f87080d89f2,
-0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
-0x3c74a385a63d07a7, 0x3fef5818dcfba487,
-0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
-0xbc82919e2040220f, 0x3fef60e316c98398,
-0x3c8c254d16117a68, 0x3fef655d71ff6075,
-0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
-0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
-0x3c843a59ac016b4b, 0x3fef7321f301b460,
-0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
-0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
-0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
-0xbc892ab93b470dc9, 0x3fef864614f5a129,
-0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
-0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
-0xbc776caa4c2ff1cf, 0x3fef953924676d76,
-0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
-0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
-0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
-0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
-0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
-0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
-0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
-0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
-0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
-0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
-0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
-0x3c901f3a75ee0efe, 0x3fefd632798844f8,
-0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
-0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
-0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
-0xbc699c7db2effc76, 0x3fefedba3692d514,
-0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
-0x3c64b458677f9840, 0x3feff9d96b2a23d9,
-#elif N == 512
-0x0, 0x3ff0000000000000,
-0xbc75d87ade1f60d5, 0x3feffd8c86da1c0a,
-0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
-0x3c9bffdaa7ac4bac, 0x3feff8ab5b2cbd11,
-0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
-0x3c75c18e5ae0563a, 0x3feff3d1e77170b4,
-0xbc82985dd8521d32, 0x3feff168143b0281,
-0xbc705b1125cf49a5, 0x3fefef003103b10e,
-0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
-0x3c9f879abbff3f87, 0x3fefea363d42b027,
-0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
-0x3c9b14003824712a, 0x3fefe57411915a8a,
-0xbc905e7a108766d1, 0x3fefe315e86e7f85,
-0x3c61cbf0f38af658, 0x3fefe0b9b35659d8,
-0x3c845fad437fa426, 0x3fefde5f72f654b1,
-0xbc9a3316383dcbc5, 0x3fefdc0727fc1762,
-0x3c8cd2523567f613, 0x3fefd9b0d3158574,
-0x3c9901c9e0e797fd, 0x3fefd75c74f0bec2,
-0xbc954529642b232f, 0x3fefd50a0e3c1f89,
-0xbc89b3236d111646, 0x3fefd2b99fa6407c,
-0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
-0xbc8cb191be99b1b0, 0x3fefce1ead925493,
-0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
-0xbc9acb71e83765b7, 0x3fefc98ba42e7d30,
-0x3c60f74e61e6c861, 0x3fefc74518759bc8,
-0x3c5cd3e58b03697e, 0x3fefc50088f8093f,
-0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
-0xbc8bfb07d4755452, 0x3fefc07d61701716,
-0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
-0x3c8aedeb3e7b14cd, 0x3fefbc02331b9715,
-0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
-0x3c9a8eb1f3d914b4, 0x3fefb78f03834e52,
-0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
-0xbc85b9eb0402507b, 0x3fefb323d833d93f,
-0x3c9407fb30d06420, 0x3fefb0f145e46c85,
-0xbc93f0f225bbf3ee, 0x3fefaec0b6bdae53,
-0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
-0xbc9c3fe7282d1784, 0x3fefaa65a4b520ba,
-0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
-0x3c9c8be44bf4cde8, 0x3fefa612a7b26300,
-0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
-0x3c820c5444c93c44, 0x3fefa1c7c55189c6,
-0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
-0xbc84c6baeb580d7a, 0x3fef9d8503328e6d,
-0xbc6a033489906e0b, 0x3fef9b66affed31b,
-0x3c8657aa1b0d9f83, 0x3fef994a66f951ce,
-0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
-0x3c62f2c7fd6ee145, 0x3fef9517f64d9ef1,
-0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
-0xbc6b0b2789925e90, 0x3fef90edb6db2dc1,
-0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
-0xbc93aad17d197fae, 0x3fef8ccbae51a5c8,
-0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
-0xbc989c464a07ad70, 0x3fef88b1e264a0e9,
-0xbc65704e90c9f860, 0x3fef86a814f204ab,
-0xbc72c338fce197f4, 0x3fef84a058cbae1e,
-0xbc91c923b9d5f416, 0x3fef829aaea92de0,
-0xbc6dca724cea0eb6, 0x3fef809717425438,
-0xbc897cea57e46280, 0x3fef7e95934f312e,
-0x3c464770b955d34d, 0x3fef7c962388149e,
-0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
-0xbc962811c114424f, 0x3fef789d83606e12,
-0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
-0x3c8ec58e74904dd4, 0x3fef74ad3c92df73,
-0xbc801b15eaa59348, 0x3fef72b83c7d517b,
-0x3c8d63b0ab2d5bbf, 0x3fef70c554eaea89,
-0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
-0xbc9ca9effbeeac92, 0x3fef6ce5d23816c9,
-0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
-0x3c8bda920de0f6e2, 0x3fef690eba4df41f,
-0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
-0xbc9a597f9a5ff71c, 0x3fef654013041dc2,
-0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
-0x3c50835b125aa573, 0x3fef6179e2363cf8,
-0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
-0x3c8aaa13d61aec1f, 0x3fef5dbc2dc40bf0,
-0xbc96d99c7611eb26, 0x3fef5be084045cd4,
-0x3c8a4f81aa7110bd, 0x3fef5a06fb91588f,
-0x3c8cdc1873af2155, 0x3fef582f95281c6b,
-0xbc6817fd6a313e3e, 0x3fef565a51860746,
-0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
-0xbc96236af85fd26a, 0x3fef52b6358e15e8,
-0xbc9493684653a131, 0x3fef50e75eb44027,
-0x3c7795eb4523abe7, 0x3fef4f1aad999e82,
-0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
-0x3c8fe58b91b40095, 0x3fef4b87bf9cda38,
-0xbc98e2899077520a, 0x3fef49c18438ce4d,
-0x3c91ecaa860c614a, 0x3fef47fd7190241e,
-0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
-0xbc3e45c83ba0bbcb, 0x3fef447bc96ffc18,
-0x3c9120fcd4f59273, 0x3fef42be3578a819,
-0xbc29fd3bea07b4ee, 0x3fef4102cd3d09b9,
-0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
-0x3c87f1c7350e256d, 0x3fef3d9282fc1f27,
-0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
-0x3c420dac6c124f4f, 0x3fef3a2af0b63bff,
-0x3c968efde3a8a894, 0x3fef387a6e756238,
-0xbc99501d09bc09fd, 0x3fef36cc1c78903a,
-0x3c877afbca90ef84, 0x3fef351ffb82140a,
-0x3c73baf864dc8675, 0x3fef33760c547f15,
-0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
-0x3c91b0575c1eaf54, 0x3fef3028c65fa1ff,
-0x3c91512f082876ee, 0x3fef2e85711ece75,
-0xbc90364bc9ce33ab, 0x3fef2ce450b3cb82,
-0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
-0xbc7548165d85ed32, 0x3fef29a8b16f0a30,
-0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
-0x3c7c3b977a68e32c, 0x3fef2675eeb3ab98,
-0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
-0xbc93a255f697ecfe, 0x3fef234c0ea83f36,
-0xbc803297e78260bf, 0x3fef21ba7591bb70,
-0x3c8d2d19edc1e550, 0x3fef202b17779965,
-0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
-0xbc76b2173113dd8c, 0x3fef1d130f50d65c,
-0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
-0x3c811aa5f853590b, 0x3fef1a03fc675d1f,
-0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
-0x3c61d61a34c8aa02, 0x3fef16fde4f2e280,
-0xbc91e75c40b4251e, 0x3fef157e39771b2f,
-0xbc91f892bf6b286d, 0x3fef1400cf2f6c18,
-0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
-0x3c7590c65c20e680, 0x3fef110cc15d5346,
-0x3c98a911f1f7785a, 0x3fef0f961f641589,
-0x3c86fe320b5c1e9d, 0x3fef0e21c1c14833,
-0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
-0xbc903cd8b2f25790, 0x3fef0b3fd6a454d2,
-0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
-0x3c7b3bf786a54a87, 0x3fef08670653dfe4,
-0x3c834d754db0abb6, 0x3fef06fe0a31b715,
-0x3c74bb6c41732885, 0x3fef05975721b004,
-0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
-0xbc99d7399abb9a8b, 0x3fef02d0cf63eeac,
-0x3c864201e2ac744c, 0x3fef0170fc4cd831,
-0xbc5451d60c6ac9eb, 0x3fef001375752b40,
-0xbc979517a03e2847, 0x3feefeb83ba8ea32,
-0x3c8787a210ceafd9, 0x3feefd5f4fb45e20,
-0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
-0xbc888d1e4629943d, 0x3feefab46484ebb4,
-0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
-0xbc93369c544088b6, 0x3feef812ba4ea77d,
-0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
-0x3c85373ce4eb6dfb, 0x3feef57a577dd72b,
-0xbc87430803972b34, 0x3feef431a2de883b,
-0x3c83adec8265a67f, 0x3feef2eb428335b4,
-0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
-0xbc835388bcac6bc5, 0x3feef06581d3f669,
-0xbc954de30ae02d94, 0x3feeef26231e754a,
-0x3c727cdb4e4b6640, 0x3feeede91be9c811,
-0xbc9907f81b512d8e, 0x3feeecae6d05d866,
-0x3c86c2696a26af35, 0x3feeeb761742d808,
-0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
-0x3c888f6ff06b979a, 0x3feee90c7a61d55b,
-0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
-0xbc89d5efaabc2030, 0x3feee6ac4bcdf3ea,
-0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
-0xbc76b8867f91c9d6, 0x3feee4559212ef89,
-0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
-0x3c94c9c0b5157fe6, 0x3feee20853c10f28,
-0x3c79c3bba5562a2f, 0x3feee0e544ede173,
-0xbc62455345b51c8e, 0x3feedfc4976d27fa,
-0x3c859f48a72a4c6d, 0x3feedea64c123422,
-0xbc93331de45477d0, 0x3feedd8a63b0a09b,
-0xbc85a71612e21658, 0x3feedc70df1c5175,
-0xbc95f84d39b39b16, 0x3feedb59bf29743f,
-0xbc9312607a28698a, 0x3feeda4504ac801c,
-0xbc72ba4dc7c4d562, 0x3feed932b07a35df,
-0x3c86421f6f1d24d6, 0x3feed822c367a024,
-0xbc844f25dc02691f, 0x3feed7153e4a136a,
-0xbc58a78f4817895b, 0x3feed60a21f72e2a,
-0xbc888d328eb9b501, 0x3feed5016f44d8f5,
-0xbc9348a6815fce65, 0x3feed3fb2709468a,
-0x3c7f0bec42ddb15a, 0x3feed2f74a1af3f1,
-0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
-0xbc615f0a2b9cd452, 0x3feed0f6d5817663,
-0x3c835c43984d9871, 0x3feecffa3f84b9d4,
-0xbc8c2e465a919e1d, 0x3feecf0018321a1a,
-0x3c4363ed60c2ac11, 0x3feece086061892d,
-0xbc865dfd02bd08f1, 0x3feecd1318eb43ec,
-0xbc632afc8d9473a0, 0x3feecc2042a7d232,
-0xbc8e68cec89b1762, 0x3feecb2fde7006f4,
-0x3c9666093b0664ef, 0x3feeca41ed1d0057,
-0xbc48ae858eb682ca, 0x3feec9566f8827d0,
-0xbc95fc5e44de020e, 0x3feec86d668b3237,
-0x3c5dd71277c0915f, 0x3feec786d3001fe5,
-0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
-0x3c92001325ecd7fb, 0x3feec5c10fa920a1,
-0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
-0x3c65ace6e2870332, 0x3feec4052c5916c4,
-0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
-0xbc9595c55690ffaf, 0x3feec2532feaada6,
-0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
-0xbc8b401ba9fb5199, 0x3feec0ab213d5283,
-0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
-0x3c6df82bf324cc57, 0x3feebf0d073537ca,
-0x3c892ca3bf144e63, 0x3feebe41b817c114,
-0x3c97cae38641c7bb, 0x3feebd78e8bb586b,
-0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
-0x3c62d80c5c4a2b67, 0x3feebbeeccbd7b2a,
-0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
-0x3c8f39c10d12eaf0, 0x3feeba6eba2e35f0,
-0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
-0xbc80b582d74a55d9, 0x3feeb8f8b804f127,
-0x3c73e34f67e67118, 0x3feeb8417f4531ee,
-0xbc6b4e327ff434ca, 0x3feeb78ccd3deb0d,
-0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
-0xbc592dca38593e20, 0x3feeb62b00da3b14,
-0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
-0xbc85daca9994833e, 0x3feeb4d359dfd53d,
-0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
-0xbc980b4321bc6dae, 0x3feeb385df598d78,
-0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
-0xbc8390afec5241c5, 0x3feeb24298571b06,
-0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
-0x3c8f15cdafe7d586, 0x3feeb1098bed1bdf,
-0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
-0xbc910aa91ae9b67f, 0x3feeafdac1351819,
-0x3c93350518fdd78e, 0x3feeaf4736b527da,
-0x3c957e1b67462375, 0x3feeaeb63f4d854c,
-0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
-0x3c8124d5051552a7, 0x3feead9c0d59ca07,
-0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
-0xbc3ca103952ecf1f, 0x3feeac8c32824135,
-0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
-0x3c773345c02a4fd6, 0x3feeab86b5f43d92,
-0x3c9063e1e21c5409, 0x3feeab07dd485429,
-0xbc909d2a0fce20f2, 0x3feeaa8b9ee20d1e,
-0xbc943a3540d1898a, 0x3feeaa11fba87a03,
-0xbc924f2cb4f81746, 0x3feea99af482fc8f,
-0x3c34c7855019c6ea, 0x3feea9268a5946b7,
-0xbc943592a0a9846b, 0x3feea8b4be135acc,
-0xbc951f58ddaa8090, 0x3feea84590998b93,
-0xbc956bc85d444f4f, 0x3feea7d902d47c65,
-0x3c9432e62b64c035, 0x3feea76f15ad2148,
-0x3c914d1e4218319f, 0x3feea707ca0cbf0f,
-0xbc82e1648e50a17c, 0x3feea6a320dceb71,
-0x3c971c93709313f4, 0x3feea6411b078d26,
-0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
-0x3c7f88303b60d222, 0x3feea584fd15612a,
-0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
-0x3c70125ca18d4b5b, 0x3feea4d3778bc944,
-0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
-0x3c9592ea73798b11, 0x3feea42c91c56acd,
-0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
-0xbc9371d6d7d75739, 0x3feea390532205d8,
-0xbc845378892be9ae, 0x3feea34634ccc320,
-0xbc8ac05fd996f807, 0x3feea2fec30678b7,
-0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
-0xbc91f5067d03653a, 0x3feea277e8dcc390,
-0xbc93cedd78565858, 0x3feea23882552225,
-0x3c917339c86ce3ad, 0x3feea1fbcc140be7,
-0xbc85c33fdf910406, 0x3feea1c1c70833f6,
-0xbc77e66065ba2500, 0x3feea18a7420a036,
-0x3c5710aa807e1964, 0x3feea155d44ca973,
-0x3c964c827ee6b49a, 0x3feea123e87bfb7a,
-0x3c81079ab5789604, 0x3feea0f4b19e9538,
-0xbc928311a3c73480, 0x3feea0c830a4c8d4,
-0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
-0x3c882c79e185e981, 0x3feea077541ee718,
-0x3c727df161cd7778, 0x3feea052fa75173e,
-0xbc8b48cea80b043b, 0x3feea0315a736c75,
-0xbc6a12ad8734b982, 0x3feea012750bdabf,
-0xbc4f4863bc8e5180, 0x3fee9ff64b30aa09,
-0x3c93f9924a05b767, 0x3fee9fdcddd47645,
-0x3c954835dd4b7548, 0x3fee9fc62dea2f8a,
-0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
-0xbc8bf41f59b59f8a, 0x3fee9fa10a38cee8,
-0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
-0xbc8f652fde52775c, 0x3fee9f86e7ba9fef,
-0xbc80dc3d54e08851, 0x3fee9f7df9519484,
-0xbc7b0300defbcf98, 0x3fee9f77ce1303f6,
-0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
-0xbc89dab646035dc0, 0x3fee9f73c4eaa988,
-0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
-0xbc91f0c230588dde, 0x3fee9f7ad3ef9011,
-0xbc88e67a9006c909, 0x3fee9f8286ead08a,
-0x3c9106450507a28c, 0x3fee9f8d02d50b8f,
-0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
-0xbc9129729a10f3a0, 0x3fee9faa5953c849,
-0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
-0x3c781a70a5124f67, 0x3fee9fd2df29ce7c,
-0xbc8619321e55e68a, 0x3fee9feb564267c9,
-0x3c941626ea62646d, 0x3feea0069c1a861d,
-0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
-0xbc940b9f54365b7c, 0x3feea04597eeba8f,
-0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
-0x3c873455e0e826c1, 0x3feea08fda749e5d,
-0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
-0x3c94f006ad874e3e, 0x3feea0e56b7fcf03,
-0xbc7b32dcb94da51d, 0x3feea11473eb0187,
-0xbc8f6d693d0973bb, 0x3feea14652e958aa,
-0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
-0x3c58c5ee2b7e7848, 0x3feea1b2988fb9ec,
-0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
-0xbc88b25e045d207b, 0x3feea22a4456e7a3,
-0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
-0xbc69cb3314060ca7, 0x3feea2ad5e2850ac,
-0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
-0x3c87a0b15d19e0bb, 0x3feea33bedf2e1b9,
-0xbc760a3629969871, 0x3feea3878491c491,
-0x3c94aa7212bfa73c, 0x3feea3d5fbab091f,
-0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
-0xbc81e688272a8a12, 0x3feea47b8f4abaa9,
-0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
-0x3c4ab7b7112ec9d5, 0x3feea52cb0d1736a,
-0xbc9369b6f13b3734, 0x3feea589994cce13,
-0x3c8a1e274eed4476, 0x3feea5e968443d9a,
-0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
-0x3c94a533a59324da, 0x3feea6b1bdadb46d,
-0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
-0x3c7a56d2760d087d, 0x3feea785b91e07f1,
-0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
-0x3c91682c1c6e8b05, 0x3feea86562ab00ec,
-0xbc94d450d872576e, 0x3feea8d99b4492ed,
-0x3c89ea99cf7a9591, 0x3feea950c27004c2,
-0x3c7c88549b958471, 0x3feea9cad931a436,
-0xbc59e57d8f92ff8e, 0x3feeaa47e08e1957,
-0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
-0x3c909b176e05a9cd, 0x3feeab4ac52be8f7,
-0x3c931143962f7877, 0x3feeabd0a478580f,
-0x3c711607f1952c95, 0x3feeac597875c644,
-0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
-0x3c869608f0f86431, 0x3feead74029db01e,
-0x3c93e9e96f112479, 0x3feeae05bad61778,
-0xbc7f1ced15c5c5c0, 0x3feeae9a6bdb5598,
-0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
-0x3c614b97be3f7b4e, 0x3feeafccbc6c19e6,
-0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
-0x3c81c1701c359530, 0x3feeb10afc931857,
-0x3c7bf68359f35f44, 0x3feeb1ae99157736,
-0xbc8edb1bf6809287, 0x3feeb2553499284b,
-0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
-0xbc8ba58ce7a736d3, 0x3feeb3ab6ccce12c,
-0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
-0xbc93fc025e1db9ce, 0x3feeb50dad829e70,
-0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
-0xbc8d737c7d71382e, 0x3feeb67bff148396,
-0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
-0x3c6ae88c43905293, 0x3feeb7f669e2802b,
-0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
-0xbc93d1f7661fe51b, 0x3feeb97cf65253d1,
-0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
-0x3c651b68797ffc1c, 0x3feebb0faccf9243,
-0xbc51669428996971, 0x3feebbdd9a7670b3,
-0x3c54579c5ceed70b, 0x3feebcae95cba768,
-0xbc92434322f4f9aa, 0x3feebd829fde4e50,
-0x3c87298413381667, 0x3feebe59b9bddb5b,
-0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
-0xbc905000be64e965, 0x3feec01121235681,
-0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
-0xbc89fb12e3454b73, 0x3feec1d4d47f2598,
-0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
-0x3c7be2a03697693b, 0x3feec3a4dc5a3dd3,
-0x3c71affc2b91ce27, 0x3feec49182a3f090,
-0x3c90622b15810eea, 0x3feec581414380f2,
-0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
-0x3be9a5ecc875d327, 0x3feec76a0bcfc15e,
-0x3c6dd235e10a73bb, 0x3feec86319e32323,
-0x3c88ea486a3350ef, 0x3feec95f4499c647,
-0xbc79740b58a20091, 0x3feeca5e8d07f29e,
-0xbc7a2ee551d4c40f, 0x3feecb60f4424fcb,
-0xbc87c50422622263, 0x3feecc667b5de565,
-0x3c89c31f7e38028b, 0x3feecd6f23701b15,
-0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
-0xbc5fac13f4e005a3, 0x3feecf89dacfe68c,
-0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
-0x3c7d8aced7162e89, 0x3feed1b1231475f7,
-0xbc903d5cbe27874b, 0x3feed2c980460ad8,
-0xbc848f50cea7269f, 0x3feed3e504f696b1,
-0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
-0x3c821eb9a08a0542, 0x3feed625893523d4,
-0x3c5986178980fce0, 0x3feed74a8af46052,
-0xbc6133a953131cfd, 0x3feed872b8950a73,
-0x3c90cc319cee31d2, 0x3feed99e1330b358,
-0x3c89e95e6f4a0ae4, 0x3feedacc9be14dca,
-0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
-0xbc90260cf07cb311, 0x3feedd333beb0b7e,
-0x3c8469846e735ab3, 0x3feede6b5579fdbf,
-0x3c1bca400a7b939d, 0x3feedfa6a1897fd2,
-0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
-0x3c9140bc34dfc19f, 0x3feee226d59a09ee,
-0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
-0xbc8c9b1da461ab87, 0x3feee4b3e100301e,
-0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
-0x3c8c115f23ebea8e, 0x3feee74dcca5a413,
-0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
-0xbc6dcab99f23f84e, 0x3feee9f4a17a4735,
-0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
-0x3c60a43e8b7e4bfe, 0x3feeeca868742ee4,
-0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
-0x3c915b1397075f04, 0x3feeef692a8fa8cd,
-0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
-0xbc839f7a1f04d2b0, 0x3feef236f0cf3f3a,
-0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
-0xbc86a510f31e13e6, 0x3feef511c43bbd62,
-0xbc7274aedac8ff80, 0x3feef68415b749b1,
-0xbc92887ea88e7340, 0x3feef7f9ade433c6,
-0xbc90a40e3da6f640, 0x3feef9728de5593a,
-0xbc6e57ac604759ba, 0x3feefaeeb6ddfc87,
-0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
-0x3c8e6c6db4f83226, 0x3feefdf0e844bfc6,
-0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
-0xbc8d1bf10460dba0, 0x3fef01004b3a7804,
-0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
-0x3c8e5d80813dddfc, 0x3fef041ce8e77680,
-0xbc91eee26b588a35, 0x3fef05b030a1064a,
-0x3c8caff9640f2dcb, 0x3fef0746ca7a67a7,
-0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
-0x3c7a77557fd62db3, 0x3fef0a7df9285775,
-0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
-0xbc651ba6128db749, 0x3fef0dc27e2cb5e5,
-0xbc302899507554e5, 0x3fef0f69c3f3a207,
-0xbc7c0ffefdc5e251, 0x3fef111462c95b60,
-0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
-0xbc8b6cd058bfd6fa, 0x3fef1473b0468d30,
-0xbc80dda2d4c0010c, 0x3fef16286141b33d,
-0x3c923759b8aca76d, 0x3fef17e06ff301f4,
-0x3c736eae30af0cb3, 0x3fef199bdd85529c,
-0xbc895498a73dac7d, 0x3fef1b5aab23e61e,
-0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
-0x3c851de924583108, 0x3fef1ee26b34e065,
-0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
-0xbc8c5fe4051ba06c, 0x3fef2277b9881650,
-0x3c836909391181d3, 0x3fef244778fafb22,
-0xbc6d1816c0a9ac07, 0x3fef261a9f8630ad,
-0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
-0xbc7af5c67c4e8235, 0x3fef29cb269e601f,
-0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
-0xbc8304ef0045d575, 0x3fef2d89584661a1,
-0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
-0x3c8725f94f910375, 0x3fef31553dfa8313,
-0xbc7ac28b7bef6621, 0x3fef33405751c4db,
-0x3c7b53e99f9191e8, 0x3fef352ee13da7cb,
-0x3c676b2c6c921968, 0x3fef3720dcef9069,
-0xbc810a79e6d7e2b8, 0x3fef39164b994d23,
-0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
-0x3c840635f6d2a9c0, 0x3fef3d0b869d8f0f,
-0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
-0x3c549eeef9ec910c, 0x3fef410e9be12cb9,
-0xbc8cc734592af7fc, 0x3fef43155b5bab74,
-0xbc8335827ffb9dce, 0x3fef451f95018d17,
-0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
-0x3c645563980ef762, 0x3fef493e7ba2c38c,
-0x3c87752a44f587e8, 0x3fef4b532b08c968,
-0xbc8cd0205eb2aab2, 0x3fef4d6b596f948c,
-0xbc900dae3875a949, 0x3fef4f87080d89f2,
-0xbc8aab80ceab2b4a, 0x3fef51a638197a3c,
-0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
-0xbc8f870f40a8ba1b, 0x3fef55ef2158a91f,
-0x3c74a385a63d07a7, 0x3fef5818dcfba487,
-0x3c83c119f18464c5, 0x3fef5a461eec14be,
-0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
-0xbc5a628c2be4e7c7, 0x3fef5eab3a99745b,
-0xbc82919e2040220f, 0x3fef60e316c98398,
-0xbc72550d76be719a, 0x3fef631e7e2d479d,
-0x3c8c254d16117a68, 0x3fef655d71ff6075,
-0xbc82090274667d12, 0x3fef679ff37adb4a,
-0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
-0x3c75f7d28150cac4, 0x3fef6c2fa45c4dfd,
-0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
-0x3c890de9296f4cd1, 0x3fef70cd9ab294e4,
-0x3c843a59ac016b4b, 0x3fef7321f301b460,
-0x3c832ff9978b34bc, 0x3fef7579e065807d,
-0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
-0xbc7303b63dda1980, 0x3fef7a347f63c159,
-0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
-0xbc81f2ba385f2f95, 0x3fef7efd81a2ece1,
-0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
-0x3c768d9144ae12fc, 0x3fef83d4f11f8220,
-0xbc892ab93b470dc9, 0x3fef864614f5a129,
-0x3c853687f542403b, 0x3fef88bad7dcee90,
-0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
-0xbc736ed2de40b407, 0x3fef8daf3fe592e8,
-0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
-0xbc614ef56c770f3b, 0x3fef92b2334ac7ee,
-0xbc776caa4c2ff1cf, 0x3fef953924676d76,
-0x3c8df7d1353d8e88, 0x3fef97c3bc24e350,
-0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
-0xbc850bed64091b8a, 0x3fef9ce3e4933c7e,
-0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
-0x3c89d852381c317f, 0x3fefa212b6bc3181,
-0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
-0x3c68a00e3cca04c4, 0x3fefa7503ccd2be5,
-0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
-0xbc5a1f25ce94cae7, 0x3fefac9c80faa594,
-0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
-0xbc6fb5f3ee307976, 0x3fefb1f78d802dc2,
-0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
-0x3c737e8ae802b851, 0x3fefb7616ca06dd6,
-0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
-0x3c875119560e34af, 0x3fefbcda28a52e59,
-0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
-0xbc7431c3840929c6, 0x3fefc261cbdf5be7,
-0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
-0xbc8cb472d2e86b99, 0x3fefc7f860a70c22,
-0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
-0x3c83f5df2fde16a8, 0x3fefcd9df15b82ac,
-0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
-0x3c8eef18336b62e3, 0x3fefd35288633625,
-0x3c901f3a75ee0efe, 0x3fefd632798844f8,
-0x3c80d23f87b50a2a, 0x3fefd916302bd526,
-0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
-0x3c8302dee657c8e6, 0x3fefdee8f32a4b45,
-0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
-0xbc7b0caa080df170, 0x3fefe4cadbdac61d,
-0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
-0x3c7617a9f2fd24e5, 0x3fefeabbf4c0ba54,
-0xbc699c7db2effc76, 0x3fefedba3692d514,
-0x3c75f103b8fd5ca7, 0x3feff0bc4866e8ad,
-0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
-0x3c8e70b094fa075a, 0x3feff6cbe15f6314,
-0x3c64b458677f9840, 0x3feff9d96b2a23d9,
-0xbc72ec9a3e5d680a, 0x3feffceaca4391b6,
-#endif
-},
-};
diff --git a/pl/math/expf.c b/pl/math/expf.c
deleted file mode 100644
index cd3cfa925c644d..00000000000000
--- a/pl/math/expf.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Single-precision e^x function.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include <math.h>
-#include <stdint.h>
-#include "math_config.h"
-
-/*
-EXPF_TABLE_BITS = 5
-EXPF_POLY_ORDER = 3
-
-ULP error: 0.502 (nearest rounding.)
-Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
-Wrong count: 170635 (all nearest rounding wrong results with fma.)
-Non-nearest ULP error: 1 (rounded ULP error)
-*/
-
-#define N (1 << EXPF_TABLE_BITS)
-#define InvLn2N __expf_data.invln2_scaled
-#define T __expf_data.tab
-#define C __expf_data.poly_scaled
-
-static inline uint32_t
-top12 (float x)
-{
-  return asuint (x) >> 20;
-}
-
-float
-optr_aor_exp_f32 (float x)
-{
-  uint32_t abstop;
-  uint64_t ki, t;
-  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-  double_t kd, xd, z, r, r2, y, s;
-
-  xd = (double_t) x;
-  abstop = top12 (x) & 0x7ff;
-  if (unlikely (abstop >= top12 (88.0f)))
-    {
-      /* |x| >= 88 or x is nan.  */
-      if (asuint (x) == asuint (-INFINITY))
-	return 0.0f;
-      if (abstop >= top12 (INFINITY))
-	return x + x;
-      if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
-	return __math_oflowf (0);
-      if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
-	return __math_uflowf (0);
-    }
-
-  /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
-  z = InvLn2N * xd;
-
-  /* Round and convert z to int, the result is in [-150*N, 128*N] and
-     ideally nearest int is used, otherwise the magnitude of r can be
-     bigger which gives larger approximation error.  */
-  kd = round (z);
-  ki = lround (z);
-  r = z - kd;
-
-  /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
-  t = T[ki % N];
-  t += ki << (52 - EXPF_TABLE_BITS);
-  s = asdouble (t);
-  z = C[0] * r + C[1];
-  r2 = r * r;
-  y = C[2] * r + 1;
-  y = z * r2 + y;
-  y = y * s;
-  return eval_as_float (y);
-}
diff --git a/pl/math/expm1_data.c b/pl/math/expm1_data.c
deleted file mode 100644
index ff7426b9013579..00000000000000
--- a/pl/math/expm1_data.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Coefficients for double-precision e^x - 1 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Generated using fpminimax, see tools/expm1.sollya for details.  */
-const double __expm1_poly[] = {0x1p-1,
-			       0x1.5555555555559p-3,
-			       0x1.555555555554bp-5,
-			       0x1.111111110f663p-7,
-			       0x1.6c16c16c1b5f3p-10,
-			       0x1.a01a01affa35dp-13,
-			       0x1.a01a018b4ecbbp-16,
-			       0x1.71ddf82db5bb4p-19,
-			       0x1.27e517fc0d54bp-22,
-			       0x1.af5eedae67435p-26,
-			       0x1.1f143d060a28ap-29};
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
deleted file mode 100644
index f886e7f8c07a02..00000000000000
--- a/pl/math/include/mathlib.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Public API.
- *
- * Copyright (c) 2015-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef _MATHLIB_H
-#define _MATHLIB_H
-
-float acosf (float);
-float acoshf (float);
-float asinf (float);
-float asinhf (float);
-float atan2f (float, float);
-float atanf (float);
-float atanhf (float);
-float cbrtf (float);
-float coshf (float);
-float cospif (float);
-float erfcf (float);
-float erff (float);
-float erfinvf (float);
-float exp10f (float);
-float expm1f (float);
-float log10f (float);
-float log1pf (float);
-float sinhf (float);
-float sinpif (float);
-float tanf (float);
-float tanhf (float);
-
-double acos (double);
-double acosh (double);
-double asin (double);
-double asinh (double);
-double atan (double);
-double atan2 (double, double);
-double atanh (double);
-double cbrt (double);
-double cosh (double);
-double cospi (double);
-double erfc (double);
-double erfinv (double);
-double exp10 (double);
-double expm1 (double);
-double log10 (double);
-double log1p (double);
-double sinh (double);
-double sinpi (double);
-double tanh (double);
-
-long double cospil (long double);
-long double erfinvl (long double);
-long double exp10l (long double);
-long double sinpil (long double);
-
-#if __aarch64__
-# if __GNUC__ >= 5
-typedef __Float32x4_t __f32x4_t;
-typedef __Float64x2_t __f64x2_t;
-# elif __clang_major__ * 100 + __clang_minor__ >= 305
-typedef __attribute__ ((__neon_vector_type__ (4))) float __f32x4_t;
-typedef __attribute__ ((__neon_vector_type__ (2))) double __f64x2_t;
-# else
-#  error Unsupported compiler
-# endif
-
-# if __GNUC__ >= 9 || __clang_major__ >= 8
-#  define __vpcs __attribute__ ((__aarch64_vector_pcs__))
-
-typedef struct __f32x4x2_t
-{
-  __f32x4_t val[2];
-} __f32x4x2_t;
-
-typedef struct __f64x2x2_t
-{
-  __f64x2_t val[2];
-} __f64x2x2_t;
-
-/* Vector functions following the vector PCS using ABI names.  */
-__vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t);
-__vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
-__vpcs __f32x4x2_t _ZGVnN4v_cexpif (__f32x4_t);
-__vpcs __f64x2x2_t _ZGVnN2v_cexpi (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_cospif (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_cospi (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_erfcf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_erfc (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_erfinvf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_erfinv (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_exp10f (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_exp10 (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2v_exp2 (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_expm1f (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_expm1 (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4vv_hypotf (__f32x4_t, __f32x4_t);
-__vpcs __f64x2_t _ZGVnN2vv_hypot (__f64x2_t, __f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_sinpif (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_sinpi (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_tanh (__f64x2_t);
-__vpcs void _ZGVnN4vl4l4_sincosf (__f32x4_t, __f32x4_t *, __f32x4_t *);
-__vpcs void _ZGVnN2vl8l8_sincos (__f64x2_t, __f64x2_t *, __f64x2_t *);
-
-# endif
-
-# if WANT_SVE_MATH
-#  include <arm_sve.h>
-svfloat32_t _ZGVsMxv_acoshf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_acosh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_acosf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_acos (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_asinhf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_asinh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_asinf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_asin (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_atanhf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_atanh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxvv_atan2f (svfloat32_t, svfloat32_t, svbool_t);
-svfloat32_t _ZGVsMxv_atanf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_atan (svfloat64_t, svbool_t);
-svfloat64_t _ZGVsMxvv_atan2 (svfloat64_t, svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_cbrtf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_cbrt (svfloat64_t, svbool_t);
-svfloat32x2_t _ZGVsMxv_cexpif (svfloat32_t, svbool_t);
-svfloat64x2_t _ZGVsMxv_cexpi (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_coshf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_cosh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_cosf (svfloat32_t, svbool_t);
-svfloat32_t _ZGVsMxv_cospif (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_cos (svfloat64_t, svbool_t);
-svfloat64_t _ZGVsMxv_cospi (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_erff (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_erf (svfloat64_t, svbool_t);
-svfloat64_t _ZGVsMxv_erfc (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_erfcf (svfloat32_t, svbool_t);
-svfloat32_t _ZGVsMxv_expf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_exp (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_exp10f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_exp10 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_exp2f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_exp2 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_expm1f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_expm1 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxvv_hypotf (svfloat32_t, svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxvv_hypot (svfloat64_t, svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_logf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_log (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_log10f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_log10 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_log1pf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_log1p (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_log2f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_log2 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxvv_powi (svfloat32_t, svint32_t, svbool_t);
-svfloat64_t _ZGVsMxvv_powk (svfloat64_t, svint64_t, svbool_t);
-svfloat32_t _ZGVsMxvv_powf (svfloat32_t, svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxvv_pow (svfloat64_t, svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_sinhf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_sinh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_sinf (svfloat32_t, svbool_t);
-svfloat32_t _ZGVsMxv_sinpif (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_sin (svfloat64_t, svbool_t);
-svfloat64_t _ZGVsMxv_sinpi (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_tanhf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_tanh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_tanf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_tan (svfloat64_t, svbool_t);
-void _ZGVsMxvl4l4_sincosf (svfloat32_t, float *, float *, svbool_t);
-void _ZGVsMxvl8l8_sincos (svfloat64_t, double *, double *, svbool_t);
-# endif
-
-#endif
-
-#endif
diff --git a/pl/math/include/pl_test.h b/pl/math/include/pl_test.h
deleted file mode 100644
index 3a3407e337b872..00000000000000
--- a/pl/math/include/pl_test.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * PL macros to aid testing. This version of this file is used for building the
- * routine, not the tests. Separate definitions are found in test/pl_test.h
- * which emit test parameters.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
- */
-
-/* Emit max ULP threshold - silenced for building the routine.  */
-#define PL_TEST_ULP(f, l)
-
-/* Emit routine name if e == 1 and f is expected to correctly trigger fenv
-   exceptions. e allows declaration to be emitted conditionally upon certain
-   build flags - defer expansion by one pass to allow those flags to be expanded
-   properly.  */
-#define PL_TEST_EXPECT_FENV(f, e)
-#define PL_TEST_EXPECT_FENV_ALWAYS(f)
-
-#define PL_TEST_INTERVAL(f, lo, hi, n)
-#define PL_TEST_SYM_INTERVAL(f, lo, hi, n)
-#define PL_TEST_INTERVAL_C(f, lo, hi, n, c)
-#define PL_TEST_SYM_INTERVAL_C(f, lo, hi, n, c)
-#define PL_TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)
diff --git a/pl/math/log.c b/pl/math/log.c
deleted file mode 100644
index 40b0441d981de3..00000000000000
--- a/pl/math/log.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Double-precision log(x) function.
- *
- * Copyright (c) 2018-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include <float.h>
-#include <math.h>
-#include <stdint.h>
-#include "math_config.h"
-
-#define T __log_data.tab
-#define T2 __log_data.tab2
-#define B __log_data.poly1
-#define A __log_data.poly
-#define Ln2hi __log_data.ln2hi
-#define Ln2lo __log_data.ln2lo
-#define N (1 << LOG_TABLE_BITS)
-#define OFF 0x3fe6000000000000
-
-/* Top 16 bits of a double.  */
-static inline uint32_t
-top16 (double x)
-{
-  return asuint64 (x) >> 48;
-}
-
-double
-optr_aor_log_f64 (double x)
-{
-  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-  double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
-  uint64_t ix, iz, tmp;
-  uint32_t top;
-  int k, i;
-
-  ix = asuint64 (x);
-  top = top16 (x);
-
-#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
-#define LO asuint64 (1.0 - 0x1p-5)
-#define HI asuint64 (1.0 + 0x1.1p-5)
-#elif LOG_POLY1_ORDER == 12
-#define LO asuint64 (1.0 - 0x1p-4)
-#define HI asuint64 (1.0 + 0x1.09p-4)
-#endif
-  if (unlikely (ix - LO < HI - LO))
-    {
-      /* Handle close to 1.0 inputs separately.  */
-      /* Fix sign of zero with downward rounding when x==1.  */
-      if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
-	return 0;
-      r = x - 1.0;
-      r2 = r * r;
-      r3 = r * r2;
-#if LOG_POLY1_ORDER == 10
-      /* Worst-case error is around 0.516 ULP.  */
-      y = r3
-	  * (B[1] + r * B[2] + r2 * B[3]
-	     + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
-      w = B[0] * r2; /* B[0] == -0.5.  */
-      hi = r + w;
-      y += r - hi + w;
-      y += hi;
-#elif LOG_POLY1_ORDER == 11
-      /* Worst-case error is around 0.516 ULP.  */
-      y = r3
-	  * (B[1] + r * B[2]
-	     + r2
-		 * (B[3] + r * B[4] + r2 * B[5]
-		    + r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
-      w = B[0] * r2; /* B[0] == -0.5.  */
-      hi = r + w;
-      y += r - hi + w;
-      y += hi;
-#elif LOG_POLY1_ORDER == 12
-      y = r3
-	  * (B[1] + r * B[2] + r2 * B[3]
-	     + r3
-		 * (B[4] + r * B[5] + r2 * B[6]
-		    + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
-#if N <= 64
-      /* Worst-case error is around 0.532 ULP.  */
-      w = B[0] * r2; /* B[0] == -0.5.  */
-      hi = r + w;
-      y += r - hi + w;
-      y += hi;
-#else
-      /* Worst-case error is around 0.507 ULP.  */
-      w = r * 0x1p27;
-      double_t rhi = r + w - w;
-      double_t rlo = r - rhi;
-      w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
-      hi = r + w;
-      lo = r - hi + w;
-      lo += B[0] * rlo * (rhi + r);
-      y += lo;
-      y += hi;
-#endif
-#endif
-      return eval_as_double (y);
-    }
-  if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
-    {
-      /* x < 0x1p-1022 or inf or nan.  */
-      if (ix * 2 == 0)
-	return __math_divzero (1);
-      if (ix == asuint64 (INFINITY)) /* log(inf) == inf.  */
-	return x;
-      if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
-	return __math_invalid (x);
-      /* x is subnormal, normalize it.  */
-      ix = asuint64 (x * 0x1p52);
-      ix -= 52ULL << 52;
-    }
-
-  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  tmp = ix - OFF;
-  i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
-  k = (int64_t) tmp >> 52; /* arithmetic shift */
-  iz = ix - (tmp & 0xfffULL << 52);
-  invc = T[i].invc;
-  logc = T[i].logc;
-  z = asdouble (iz);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-  /* r ~= z/c - 1, |r| < 1/(2*N).  */
-#if HAVE_FAST_FMA
-  /* rounding error: 0x1p-55/N.  */
-  r = fma (z, invc, -1.0);
-#else
-  /* rounding error: 0x1p-55/N + 0x1p-66.  */
-  r = (z - T2[i].chi - T2[i].clo) * invc;
-#endif
-  kd = (double_t) k;
-
-  /* hi + lo = r + log(c) + k*Ln2.  */
-  w = kd * Ln2hi + logc;
-  hi = w + r;
-  lo = w - hi + r + kd * Ln2lo;
-
-  /* log(x) = lo + (log1p(r) - r) + hi.  */
-  r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
-  /* Worst case error if |y| > 0x1p-5:
-     0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
-     Worst case error if |y| > 0x1p-4:
-     0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).  */
-#if LOG_POLY_ORDER == 6
-  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
-#elif LOG_POLY_ORDER == 7
-  y = lo
-      + r2
-	  * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
-	     + r2 * r2 * (A[4] + r * A[5]))
-      + hi;
-#endif
-  return eval_as_double (y);
-}
diff --git a/pl/math/log1p_data.c b/pl/math/log1p_data.c
deleted file mode 100644
index 6168a0c9a21472..00000000000000
--- a/pl/math/log1p_data.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Data used in double-precision log(1+x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Polynomial coefficients generated using Remez algorithm, see
-   log1p.sollya for details.  */
-const struct log1p_data __log1p_data = {
-  .coeffs = {-0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
-	     0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
-	     -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
-	     0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
-	     -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
-	     0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
-	     -0x1.cfa7385bdb37ep-6}};
diff --git a/pl/math/log_data.c b/pl/math/log_data.c
deleted file mode 100644
index 34715e5036a39d..00000000000000
--- a/pl/math/log_data.c
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
- * Data for log.
- *
- * Copyright (c) 2018-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#define N (1 << LOG_TABLE_BITS)
-
-const struct log_data __log_data = {
-.ln2hi = 0x1.62e42fefa3800p-1,
-.ln2lo = 0x1.ef35793c76730p-45,
-.poly1 = {
-#if LOG_POLY1_ORDER == 10
-// relative error: 0x1.32eccc6p-62
-// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
--0x1p-1,
-0x1.55555555554e5p-2,
--0x1.0000000000af2p-2,
-0x1.9999999bbe436p-3,
--0x1.55555537f9cdep-3,
-0x1.24922fc8127cfp-3,
--0x1.0000b7d6bb612p-3,
-0x1.c806ee1ddbcafp-4,
--0x1.972335a9c2d6ep-4,
-#elif LOG_POLY1_ORDER == 11
-// relative error: 0x1.52c8b708p-68
-// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
--0x1p-1,
-0x1.5555555555555p-2,
--0x1.ffffffffffea9p-3,
-0x1.999999999c4d4p-3,
--0x1.55555557f5541p-3,
-0x1.249248fbe33e4p-3,
--0x1.ffffc9a3c825bp-4,
-0x1.c71e1f204435dp-4,
--0x1.9a7f26377d06ep-4,
-0x1.71c30cf8f7364p-4,
-#elif LOG_POLY1_ORDER == 12
-// relative error: 0x1.c04d76cp-63
-// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
--0x1p-1,
-0x1.5555555555577p-2,
--0x1.ffffffffffdcbp-3,
-0x1.999999995dd0cp-3,
--0x1.55555556745a7p-3,
-0x1.24924a344de3p-3,
--0x1.fffffa4423d65p-4,
-0x1.c7184282ad6cap-4,
--0x1.999eb43b068ffp-4,
-0x1.78182f7afd085p-4,
--0x1.5521375d145cdp-4,
-#endif
-},
-.poly = {
-#if N == 64 && LOG_POLY_ORDER == 7
-// relative error: 0x1.906eb8ap-58
-// abs error: 0x1.d2cad5a8p-67
-// in -0x1.fp-8 0x1.fp-8
--0x1.0000000000027p-1,
-0x1.555555555556ap-2,
--0x1.fffffff0440bap-3,
-0x1.99999991906c3p-3,
--0x1.555c8d7e8201ep-3,
-0x1.24978c59151fap-3,
-#elif N == 128 && LOG_POLY_ORDER == 6
-// relative error: 0x1.926199e8p-56
-// abs error: 0x1.882ff33p-65
-// in -0x1.fp-9 0x1.fp-9
--0x1.0000000000001p-1,
-0x1.555555551305bp-2,
--0x1.fffffffeb459p-3,
-0x1.999b324f10111p-3,
--0x1.55575e506c89fp-3,
-#elif N == 128 && LOG_POLY_ORDER == 7
-// relative error: 0x1.649fc4bp-64
-// abs error: 0x1.c3b5769p-74
-// in -0x1.fp-9 0x1.fp-9
--0x1.0000000000001p-1,
-0x1.5555555555556p-2,
--0x1.fffffffea1a8p-3,
-0x1.99999998e9139p-3,
--0x1.555776801b968p-3,
-0x1.2493c29331a5cp-3,
-#endif
-},
-/* Algorithm:
-
-	x = 2^k z
-	log(x) = k ln2 + log(c) + log(z/c)
-	log(z/c) = poly(z/c - 1)
-
-where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
-into the ith one, then table entries are computed as
-
-	tab[i].invc = 1/c
-	tab[i].logc = (double)log(c)
-	tab2[i].chi = (double)c
-	tab2[i].clo = (double)(c - (double)c)
-
-where c is near the center of the subinterval and is chosen by trying +-2^29
-floating point invc candidates around 1/center and selecting one for which
-
-	1) the rounding error in 0x1.8p9 + logc is 0,
-	2) the rounding error in z - chi - clo is < 0x1p-66 and
-	3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
-
-Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
-2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
-a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
-that logc + poly(z/c - 1) has small error, however near x == 1 when
-|log(x)| < 0x1p-4, this is not enough so that is special cased.  */
-.tab = {
-#if N == 64
-{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
-{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
-{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
-{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
-{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
-{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
-{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
-{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
-{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
-{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
-{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
-{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
-{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
-{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
-{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
-{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
-{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
-{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
-{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
-{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
-{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
-{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
-{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
-{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
-{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
-{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
-{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
-{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
-{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
-{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
-{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
-{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
-{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
-{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
-{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
-{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
-{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
-{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
-{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
-{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
-{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
-{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
-{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
-{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
-{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
-{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
-{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
-{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
-{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
-{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
-{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
-{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
-{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
-{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
-{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
-{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
-{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
-{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
-{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
-{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
-{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
-{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
-{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
-{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
-#elif N == 128
-{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
-{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
-{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
-{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
-{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
-{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
-{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
-{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
-{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
-{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
-{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
-{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
-{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
-{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
-{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
-{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
-{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
-{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
-{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
-{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
-{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
-{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
-{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
-{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
-{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
-{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
-{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
-{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
-{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
-{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
-{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
-{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
-{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
-{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
-{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
-{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
-{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
-{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
-{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
-{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
-{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
-{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
-{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
-{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
-{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
-{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
-{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
-{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
-{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
-{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
-{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
-{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
-{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
-{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
-{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
-{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
-{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
-{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
-{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
-{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
-{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
-{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
-{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
-{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
-{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
-{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
-{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
-{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
-{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
-{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
-{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
-{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
-{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
-{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
-{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
-{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
-{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
-{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
-{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
-{0x1.008040614b195p+0, -0x1.0040979240000p-9},
-{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
-{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
-{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
-{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
-{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
-{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
-{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
-{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
-{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
-{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
-{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
-{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
-{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
-{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
-{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
-{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
-{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
-{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
-{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
-{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
-{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
-{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
-{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
-{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
-{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
-{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
-{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
-{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
-{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
-{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
-{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
-{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
-{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
-{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
-{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
-{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
-{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
-{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
-{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
-{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
-{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
-{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
-{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
-{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
-{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
-{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
-{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
-{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
-#endif
-},
-#if !HAVE_FAST_FMA
-.tab2 = {
-#if N == 64
-{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
-{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
-{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
-{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
-{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
-{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
-{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
-{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
-{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
-{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
-{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
-{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
-{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
-{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
-{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
-{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
-{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
-{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
-{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
-{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
-{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
-{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
-{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
-{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
-{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
-{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
-{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
-{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
-{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
-{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
-{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
-{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
-{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
-{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
-{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
-{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
-{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
-{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
-{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
-{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
-{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
-{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
-{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
-{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
-{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
-{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
-{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
-{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
-{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
-{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
-{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
-{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
-{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
-{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
-{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
-{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
-{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
-{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
-{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
-{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
-{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
-{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
-{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
-{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
-#elif N == 128
-{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
-{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
-{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
-{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
-{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
-{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
-{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
-{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
-{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
-{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
-{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
-{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
-{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
-{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
-{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
-{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
-{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
-{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
-{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
-{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
-{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
-{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
-{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
-{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
-{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
-{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
-{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
-{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
-{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
-{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
-{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
-{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
-{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
-{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
-{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
-{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
-{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
-{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
-{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
-{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
-{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
-{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
-{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
-{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
-{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
-{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
-{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
-{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
-{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
-{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
-{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
-{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
-{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
-{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
-{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
-{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
-{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
-{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
-{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
-{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
-{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
-{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
-{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
-{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
-{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
-{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
-{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
-{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
-{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
-{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
-{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
-{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
-{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
-{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
-{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
-{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
-{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
-{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
-{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
-{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
-{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
-{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
-{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
-{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
-{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
-{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
-{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
-{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
-{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
-{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
-{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
-{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
-{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
-{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
-{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
-{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
-{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
-{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
-{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
-{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
-{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
-{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
-{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
-{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
-{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
-{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
-{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
-{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
-{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
-{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
-{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
-{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
-{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
-{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
-{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
-{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
-{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
-{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
-{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
-{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
-{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
-{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
-{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
-{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
-{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
-{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
-{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
-{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
-#endif
-},
-#endif /* !HAVE_FAST_FMA */
-};
diff --git a/pl/math/logf.c b/pl/math/logf.c
deleted file mode 100644
index 17a74ed6d28f10..00000000000000
--- a/pl/math/logf.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Single-precision log function.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include <math.h>
-#include <stdint.h>
-#include "math_config.h"
-
-/*
-LOGF_TABLE_BITS = 4
-LOGF_POLY_ORDER = 4
-
-ULP error: 0.818 (nearest rounding.)
-Relative error: 1.957 * 2^-26 (before rounding.)
-*/
-
-#define T __logf_data.tab
-#define A __logf_data.poly
-#define Ln2 __logf_data.ln2
-#define N (1 << LOGF_TABLE_BITS)
-#define OFF 0x3f330000
-
-float
-optr_aor_log_f32 (float x)
-{
-  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-  double_t z, r, r2, y, y0, invc, logc;
-  uint32_t ix, iz, tmp;
-  int k, i;
-
-  ix = asuint (x);
-#if WANT_ROUNDING
-  /* Fix sign of zero with downward rounding when x==1.  */
-  if (unlikely (ix == 0x3f800000))
-    return 0;
-#endif
-  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
-    {
-      /* x < 0x1p-126 or inf or nan.  */
-      if (ix * 2 == 0)
-	return __math_divzerof (1);
-      if (ix == 0x7f800000) /* log(inf) == inf.  */
-	return x;
-      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
-	return __math_invalidf (x);
-      /* x is subnormal, normalize it.  */
-      ix = asuint (x * 0x1p23f);
-      ix -= 23 << 23;
-    }
-
-  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  tmp = ix - OFF;
-  i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
-  k = (int32_t) tmp >> 23; /* arithmetic shift */
-  iz = ix - (tmp & 0x1ff << 23);
-  invc = T[i].invc;
-  logc = T[i].logc;
-  z = (double_t) asfloat (iz);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
-  r = z * invc - 1;
-  y0 = logc + (double_t) k * Ln2;
-
-  /* Pipelined polynomial evaluation to approximate log1p(r).  */
-  r2 = r * r;
-  y = A[1] * r + A[2];
-  y = A[0] * r2 + y;
-  y = y * r2 + (y0 + r);
-  return eval_as_float (y);
-}
diff --git a/pl/math/logf_data.c b/pl/math/logf_data.c
deleted file mode 100644
index 97d9eb8d009779..00000000000000
--- a/pl/math/logf_data.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Data definition for logf and log10f.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-const struct logf_data __logf_data = {
-    .tab =
-        {
-            {0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2},
-            {0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2},
-            {0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2},
-            {0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3},
-            {0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3},
-            {0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3},
-            {0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4},
-            {0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4},
-            {0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5},
-            {0x1p+0, 0x0p+0},
-            {0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5},
-            {0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4},
-            {0x1.b2036576afce6p-1, 0x1.526e57720db08p-3},
-            {0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3},
-            {0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2},
-            {0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2},
-        },
-    .ln2 = 0x1.62e42fefa39efp-1,
-    .invln10 = 0x1.bcb7b1526e50ep-2,
-    .poly = {
-        -0x1.00ea348b88334p-2,
-        0x1.5575b0be00b6ap-2,
-        -0x1.ffffef20a4123p-2,
-    }};
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
deleted file mode 100644
index c3dd8f2db8c7b0..00000000000000
--- a/pl/math/math_config.h
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- * Configuration for math routines.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef _MATH_CONFIG_H
-#define _MATH_CONFIG_H
-
-#include <math.h>
-#include <stdint.h>
-
-#ifndef WANT_ROUNDING
-/* If defined to 1, return correct results for special cases in non-nearest
-   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
-   -0.0f). This may be set to 0 if there is no fenv support or if math
-   functions only get called in round to nearest mode.  */
-# define WANT_ROUNDING 1
-#endif
-#ifndef WANT_ERRNO
-/* If defined to 1, set errno in math functions according to ISO C.  Many math
-   libraries do not set errno, so this is 0 by default.  It may need to be
-   set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
-# define WANT_ERRNO 0
-#endif
-#ifndef WANT_SIMD_EXCEPT
-/* If defined to 1, trigger fp exceptions in vector routines, consistently with
-   behaviour expected from the corresponding scalar routine.  */
-# define WANT_SIMD_EXCEPT 0
-#endif
-
-/* Compiler can inline round as a single instruction.  */
-#ifndef HAVE_FAST_ROUND
-# if __aarch64__
-#  define HAVE_FAST_ROUND 1
-# else
-#  define HAVE_FAST_ROUND 0
-# endif
-#endif
-
-/* Compiler can inline lround, but not (long)round(x).  */
-#ifndef HAVE_FAST_LROUND
-# if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408                 \
-      && __NO_MATH_ERRNO__
-#  define HAVE_FAST_LROUND 1
-# else
-#  define HAVE_FAST_LROUND 0
-# endif
-#endif
-
-/* Compiler can inline fma as a single instruction.  */
-#ifndef HAVE_FAST_FMA
-# if defined FP_FAST_FMA || __aarch64__
-#  define HAVE_FAST_FMA 1
-# else
-#  define HAVE_FAST_FMA 0
-# endif
-#endif
-
-/* Provide *_finite symbols and some of the glibc hidden symbols
-   so libmathlib can be used with binaries compiled against glibc
-   to interpose math functions with both static and dynamic linking.  */
-#ifndef USE_GLIBC_ABI
-# if __GNUC__
-#  define USE_GLIBC_ABI 1
-# else
-#  define USE_GLIBC_ABI 0
-# endif
-#endif
-
-/* Optionally used extensions.  */
-#ifdef __GNUC__
-# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
-# define NOINLINE __attribute__ ((noinline))
-# define UNUSED __attribute__ ((unused))
-# define likely(x) __builtin_expect (!!(x), 1)
-# define unlikely(x) __builtin_expect (x, 0)
-# if __GNUC__ >= 9
-#  define attribute_copy(f) __attribute__ ((copy (f)))
-# else
-#  define attribute_copy(f)
-# endif
-# define strong_alias(f, a)                                                   \
-    extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
-# define hidden_alias(f, a)                                                   \
-    extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
-	attribute_copy (f);
-#else
-# define HIDDEN
-# define NOINLINE
-# define UNUSED
-# define likely(x) (x)
-# define unlikely(x) (x)
-#endif
-
-/* Return ptr but hide its value from the compiler so accesses through it
-   cannot be optimized based on the contents.  */
-#define ptr_barrier(ptr)                                                      \
-  ({                                                                          \
-    __typeof (ptr) __ptr = (ptr);                                             \
-    __asm("" : "+r"(__ptr));                                                  \
-    __ptr;                                                                    \
-  })
-
-/* Symbol renames to avoid libc conflicts.  */
-#define __math_oflowf arm_math_oflowf
-#define __math_uflowf arm_math_uflowf
-#define __math_may_uflowf arm_math_may_uflowf
-#define __math_divzerof arm_math_divzerof
-#define __math_oflow arm_math_oflow
-#define __math_uflow arm_math_uflow
-#define __math_may_uflow arm_math_may_uflow
-#define __math_divzero arm_math_divzero
-#define __math_invalidf arm_math_invalidf
-#define __math_invalid arm_math_invalid
-#define __math_check_oflow arm_math_check_oflow
-#define __math_check_uflow arm_math_check_uflow
-#define __math_check_oflowf arm_math_check_oflowf
-#define __math_check_uflowf arm_math_check_uflowf
-
-#if HAVE_FAST_ROUND
-/* When set, the roundtoint and converttoint functions are provided with
-   the semantics documented below.  */
-# define TOINT_INTRINSICS 1
-
-/* Round x to nearest int in all rounding modes, ties have to be rounded
-   consistently with converttoint so the results match.  If the result
-   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
-static inline double_t
-roundtoint (double_t x)
-{
-  return round (x);
-}
-
-/* Convert x to nearest int in all rounding modes, ties have to be rounded
-   consistently with roundtoint.  If the result is not representible in an
-   int32_t then the semantics is unspecified.  */
-static inline int32_t
-converttoint (double_t x)
-{
-# if HAVE_FAST_LROUND
-  return lround (x);
-# else
-  return (long) round (x);
-# endif
-}
-#endif
-
-static inline uint32_t
-asuint (float f)
-{
-  union
-  {
-    float f;
-    uint32_t i;
-  } u = { f };
-  return u.i;
-}
-
-static inline float
-asfloat (uint32_t i)
-{
-  union
-  {
-    uint32_t i;
-    float f;
-  } u = { i };
-  return u.f;
-}
-
-static inline uint64_t
-asuint64 (double f)
-{
-  union
-  {
-    double f;
-    uint64_t i;
-  } u = { f };
-  return u.i;
-}
-
-static inline double
-asdouble (uint64_t i)
-{
-  union
-  {
-    uint64_t i;
-    double f;
-  } u = { i };
-  return u.f;
-}
-
-#ifndef IEEE_754_2008_SNAN
-# define IEEE_754_2008_SNAN 1
-#endif
-static inline int
-issignalingf_inline (float x)
-{
-  uint32_t ix = asuint (x);
-  if (!IEEE_754_2008_SNAN)
-    return (ix & 0x7fc00000) == 0x7fc00000;
-  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
-}
-
-static inline int
-issignaling_inline (double x)
-{
-  uint64_t ix = asuint64 (x);
-  if (!IEEE_754_2008_SNAN)
-    return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
-  return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
-}
-
-#if __aarch64__ && __GNUC__
-/* Prevent the optimization of a floating-point expression.  */
-static inline float
-opt_barrier_float (float x)
-{
-  __asm__ __volatile__ ("" : "+w" (x));
-  return x;
-}
-static inline double
-opt_barrier_double (double x)
-{
-  __asm__ __volatile__ ("" : "+w" (x));
-  return x;
-}
-/* Force the evaluation of a floating-point expression for its side-effect.  */
-static inline void
-force_eval_float (float x)
-{
-  __asm__ __volatile__ ("" : "+w" (x));
-}
-static inline void
-force_eval_double (double x)
-{
-  __asm__ __volatile__ ("" : "+w" (x));
-}
-#else
-static inline float
-opt_barrier_float (float x)
-{
-  volatile float y = x;
-  return y;
-}
-static inline double
-opt_barrier_double (double x)
-{
-  volatile double y = x;
-  return y;
-}
-static inline void
-force_eval_float (float x)
-{
-  volatile float y UNUSED = x;
-}
-static inline void
-force_eval_double (double x)
-{
-  volatile double y UNUSED = x;
-}
-#endif
-
-/* Evaluate an expression as the specified type, normally a type
-   cast should be enough, but compilers implement non-standard
-   excess-precision handling, so when FLT_EVAL_METHOD != 0 then
-   these functions may need to be customized.  */
-static inline float
-eval_as_float (float x)
-{
-  return x;
-}
-static inline double
-eval_as_double (double x)
-{
-  return x;
-}
-
-/* Error handling tail calls for special cases, with a sign argument.
-   The sign of the return value is set if the argument is non-zero.  */
-
-/* The result overflows.  */
-HIDDEN float __math_oflowf (uint32_t);
-/* The result underflows to 0 in nearest rounding mode.  */
-HIDDEN float __math_uflowf (uint32_t);
-/* The result underflows to 0 in some directed rounding mode only.  */
-HIDDEN float __math_may_uflowf (uint32_t);
-/* Division by zero.  */
-HIDDEN float __math_divzerof (uint32_t);
-/* The result overflows.  */
-HIDDEN double __math_oflow (uint32_t);
-/* The result underflows to 0 in nearest rounding mode.  */
-HIDDEN double __math_uflow (uint32_t);
-/* The result underflows to 0 in some directed rounding mode only.  */
-HIDDEN double __math_may_uflow (uint32_t);
-/* Division by zero.  */
-HIDDEN double __math_divzero (uint32_t);
-
-/* Error handling using input checking.  */
-
-/* Invalid input unless it is a quiet NaN.  */
-HIDDEN float __math_invalidf (float);
-/* Invalid input unless it is a quiet NaN.  */
-HIDDEN double __math_invalid (double);
-
-/* Error handling using output checking, only for errno setting.  */
-
-/* Check if the result overflowed to infinity.  */
-HIDDEN double __math_check_oflow (double);
-/* Check if the result underflowed to 0.  */
-HIDDEN double __math_check_uflow (double);
-
-/* Check if the result overflowed to infinity.  */
-static inline double
-check_oflow (double x)
-{
-  return WANT_ERRNO ? __math_check_oflow (x) : x;
-}
-
-/* Check if the result underflowed to 0.  */
-static inline double
-check_uflow (double x)
-{
-  return WANT_ERRNO ? __math_check_uflow (x) : x;
-}
-
-/* Check if the result overflowed to infinity.  */
-HIDDEN float __math_check_oflowf (float);
-/* Check if the result underflowed to 0.  */
-HIDDEN float __math_check_uflowf (float);
-
-/* Check if the result overflowed to infinity.  */
-static inline float
-check_oflowf (float x)
-{
-  return WANT_ERRNO ? __math_check_oflowf (x) : x;
-}
-
-/* Check if the result underflowed to 0.  */
-static inline float
-check_uflowf (float x)
-{
-  return WANT_ERRNO ? __math_check_uflowf (x) : x;
-}
-
-extern const struct erff_data
-{
-  struct
-  {
-    float erf, scale;
-  } tab[513];
-} __erff_data HIDDEN;
-
-extern const struct sv_erff_data
-{
-  float erf[513];
-  float scale[513];
-} __sv_erff_data HIDDEN;
-
-extern const struct erfcf_data
-{
-  struct
-  {
-    float erfc, scale;
-  } tab[645];
-} __erfcf_data HIDDEN;
-
-/* Data for logf and log10f.  */
-#define LOGF_TABLE_BITS 4
-#define LOGF_POLY_ORDER 4
-extern const struct logf_data
-{
-  struct
-  {
-    double invc, logc;
-  } tab[1 << LOGF_TABLE_BITS];
-  double ln2;
-  double invln10;
-  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
-} __logf_data HIDDEN;
-
-/* Data for low accuracy log10 (with 1/ln(10) included in coefficients).  */
-#define LOG10_TABLE_BITS 7
-#define LOG10_POLY_ORDER 6
-#define LOG10_POLY1_ORDER 12
-extern const struct log10_data
-{
-  double ln2hi;
-  double ln2lo;
-  double invln10;
-  double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10).  */
-  double poly1[LOG10_POLY1_ORDER - 1];
-  struct
-  {
-    double invc, logc;
-  } tab[1 << LOG10_TABLE_BITS];
-#if !HAVE_FAST_FMA
-  struct
-  {
-    double chi, clo;
-  } tab2[1 << LOG10_TABLE_BITS];
-#endif
-} __log10_data HIDDEN;
-
-#define EXP_TABLE_BITS 7
-#define EXP_POLY_ORDER 5
-/* Use polynomial that is optimized for a wider input range.  This may be
-   needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
-#define EXP_POLY_WIDE 0
-/* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
-   needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
-#define EXP_USE_TOINT_NARROW 0
-#define EXP2_POLY_ORDER 5
-#define EXP2_POLY_WIDE 0
-extern const struct exp_data
-{
-  double invln2N;
-  double shift;
-  double negln2hiN;
-  double negln2loN;
-  double poly[4]; /* Last four coefficients.  */
-  double exp2_shift;
-  double exp2_poly[EXP2_POLY_ORDER];
-  uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
-} __exp_data HIDDEN;
-
-/* Copied from math/v_exp.h for use in vector exp_tail.  */
-#define V_EXP_TAIL_TABLE_BITS 8
-extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
-
-/* Copied from math/v_exp.h for use in vector exp2.  */
-#define V_EXP_TABLE_BITS 7
-extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
-
-extern const struct erf_data
-{
-  struct
-  {
-    double erf, scale;
-  } tab[769];
-} __erf_data HIDDEN;
-
-extern const struct sv_erf_data
-{
-  double erf[769];
-  double scale[769];
-} __sv_erf_data HIDDEN;
-
-extern const struct erfc_data
-{
-  struct
-  {
-    double erfc, scale;
-  } tab[3488];
-} __erfc_data HIDDEN;
-
-#define ATAN_POLY_NCOEFFS 20
-extern const struct atan_poly_data
-{
-  double poly[ATAN_POLY_NCOEFFS];
-} __atan_poly_data HIDDEN;
-
-#define ATANF_POLY_NCOEFFS 8
-extern const struct atanf_poly_data
-{
-  float poly[ATANF_POLY_NCOEFFS];
-} __atanf_poly_data HIDDEN;
-
-#define ASINHF_NCOEFFS 8
-extern const struct asinhf_data
-{
-  float coeffs[ASINHF_NCOEFFS];
-} __asinhf_data HIDDEN;
-
-#define LOG_TABLE_BITS 7
-#define LOG_POLY_ORDER 6
-#define LOG_POLY1_ORDER 12
-extern const struct log_data
-{
-  double ln2hi;
-  double ln2lo;
-  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
-  double poly1[LOG_POLY1_ORDER - 1];
-  struct
-  {
-    double invc, logc;
-  } tab[1 << LOG_TABLE_BITS];
-#if !HAVE_FAST_FMA
-  struct
-  {
-    double chi, clo;
-  } tab2[1 << LOG_TABLE_BITS];
-#endif
-} __log_data HIDDEN;
-
-#define ASINH_NCOEFFS 18
-extern const struct asinh_data
-{
-  double poly[ASINH_NCOEFFS];
-} __asinh_data HIDDEN;
-
-#define LOG1P_NCOEFFS 19
-extern const struct log1p_data
-{
-  double coeffs[LOG1P_NCOEFFS];
-} __log1p_data HIDDEN;
-
-#define LOG1PF_2U5
-#define LOG1PF_NCOEFFS 9
-extern const struct log1pf_data
-{
-  float coeffs[LOG1PF_NCOEFFS];
-} __log1pf_data HIDDEN;
-
-#define TANF_P_POLY_NCOEFFS 6
-/* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps.  */
-#define TANF_Q_POLY_NCOEFFS 4
-extern const struct tanf_poly_data
-{
-  float poly_tan[TANF_P_POLY_NCOEFFS];
-  float poly_cotan[TANF_Q_POLY_NCOEFFS];
-} __tanf_poly_data HIDDEN;
-
-#define V_LOG2_TABLE_BITS 7
-extern const struct v_log2_data
-{
-  double poly[5];
-  double invln2;
-  struct
-  {
-    double invc, log2c;
-  } table[1 << V_LOG2_TABLE_BITS];
-} __v_log2_data HIDDEN;
-
-#define V_LOG10_TABLE_BITS 7
-extern const struct v_log10_data
-{
-  double poly[5];
-  double invln10, log10_2;
-  struct
-  {
-    double invc, log10c;
-  } table[1 << V_LOG10_TABLE_BITS];
-} __v_log10_data HIDDEN;
-
-/* Some data for SVE powf's internal exp and log.  */
-#define V_POWF_EXP2_TABLE_BITS 5
-#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
-#define V_POWF_LOG2_TABLE_BITS 5
-#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
-extern const struct v_powf_data
-{
-  double invc[V_POWF_LOG2_N];
-  double logc[V_POWF_LOG2_N];
-  uint64_t scale[V_POWF_EXP2_N];
-} __v_powf_data HIDDEN;
-
-#define V_LOG_POLY_ORDER 6
-#define V_LOG_TABLE_BITS 7
-extern const struct v_log_data
-{
-  /* Shared data for vector log and log-derived routines (e.g. asinh).  */
-  double poly[V_LOG_POLY_ORDER - 1];
-  double ln2;
-  struct
-  {
-    double invc, logc;
-  } table[1 << V_LOG_TABLE_BITS];
-} __v_log_data HIDDEN;
-
-#define EXPM1F_POLY_ORDER 5
-extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
-
-#define EXPF_TABLE_BITS 5
-#define EXPF_POLY_ORDER 3
-extern const struct expf_data
-{
-  uint64_t tab[1 << EXPF_TABLE_BITS];
-  double invln2_scaled;
-  double poly_scaled[EXPF_POLY_ORDER];
-} __expf_data HIDDEN;
-
-#define EXPM1_POLY_ORDER 11
-extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
-
-extern const struct cbrtf_data
-{
-  float poly[4];
-  float table[5];
-} __cbrtf_data HIDDEN;
-
-extern const struct cbrt_data
-{
-  double poly[4];
-  double table[5];
-} __cbrt_data HIDDEN;
-
-#define ASINF_POLY_ORDER 4
-extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
-
-#define ASIN_POLY_ORDER 11
-extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
-
-/* Some data for AdvSIMD and SVE pow's internal exp and log.  */
-#define V_POW_EXP_TABLE_BITS 8
-extern const struct v_pow_exp_data
-{
-  double poly[3];
-  double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
-  uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
-} __v_pow_exp_data HIDDEN;
-
-#define V_POW_LOG_TABLE_BITS 7
-extern const struct v_pow_log_data
-{
-  double poly[7]; /* First coefficient is 1.  */
-  double ln2_hi, ln2_lo;
-  double invc[1 << V_POW_LOG_TABLE_BITS];
-  double logc[1 << V_POW_LOG_TABLE_BITS];
-  double logctail[1 << V_POW_LOG_TABLE_BITS];
-} __v_pow_log_data HIDDEN;
-
-#endif
diff --git a/pl/math/math_err.c b/pl/math/math_err.c
deleted file mode 100644
index 74db54a5b2cd16..00000000000000
--- a/pl/math/math_err.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Double-precision math error handling.
- *
- * Copyright (c) 2018-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#if WANT_ERRNO
-# include <errno.h>
-/* NOINLINE reduces code size and avoids making math functions non-leaf
-   when the error handling is inlined.  */
-NOINLINE static double
-with_errno (double y, int e)
-{
-  errno = e;
-  return y;
-}
-#else
-# define with_errno(x, e) (x)
-#endif
-
-/* NOINLINE reduces code size.  */
-NOINLINE static double
-xflow (uint32_t sign, double y)
-{
-  y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
-  return with_errno (y, ERANGE);
-}
-
-HIDDEN double
-__math_uflow (uint32_t sign)
-{
-  return xflow (sign, 0x1p-767);
-}
-
-/* Underflows to zero in some non-nearest rounding mode, setting errno
-   is valid even if the result is non-zero, but in the subnormal range.  */
-HIDDEN double
-__math_may_uflow (uint32_t sign)
-{
-  return xflow (sign, 0x1.8p-538);
-}
-
-HIDDEN double
-__math_oflow (uint32_t sign)
-{
-  return xflow (sign, 0x1p769);
-}
-
-HIDDEN double
-__math_divzero (uint32_t sign)
-{
-  double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
-  return with_errno (y, ERANGE);
-}
-
-HIDDEN double
-__math_invalid (double x)
-{
-  double y = (x - x) / (x - x);
-  return isnan (x) ? y : with_errno (y, EDOM);
-}
-
-/* Check result and set errno if necessary.  */
-
-HIDDEN double
-__math_check_uflow (double y)
-{
-  return y == 0.0 ? with_errno (y, ERANGE) : y;
-}
-
-HIDDEN double
-__math_check_oflow (double y)
-{
-  return isinf (y) ? with_errno (y, ERANGE) : y;
-}
diff --git a/pl/math/math_errf.c b/pl/math/math_errf.c
deleted file mode 100644
index 2b8c6bd25753b6..00000000000000
--- a/pl/math/math_errf.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Single-precision math error handling.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#if WANT_ERRNO
-# include <errno.h>
-/* NOINLINE reduces code size and avoids making math functions non-leaf
-   when the error handling is inlined.  */
-NOINLINE static float
-with_errnof (float y, int e)
-{
-  errno = e;
-  return y;
-}
-#else
-# define with_errnof(x, e) (x)
-#endif
-
-/* NOINLINE reduces code size.  */
-NOINLINE static float
-xflowf (uint32_t sign, float y)
-{
-  y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
-  return with_errnof (y, ERANGE);
-}
-
-HIDDEN float
-__math_uflowf (uint32_t sign)
-{
-  return xflowf (sign, 0x1p-95f);
-}
-
-/* Underflows to zero in some non-nearest rounding mode, setting errno
-   is valid even if the result is non-zero, but in the subnormal range.  */
-HIDDEN float
-__math_may_uflowf (uint32_t sign)
-{
-  return xflowf (sign, 0x1.4p-75f);
-}
-
-HIDDEN float
-__math_oflowf (uint32_t sign)
-{
-  return xflowf (sign, 0x1p97f);
-}
-
-HIDDEN float
-__math_divzerof (uint32_t sign)
-{
-  float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
-  return with_errnof (y, ERANGE);
-}
-
-HIDDEN float
-__math_invalidf (float x)
-{
-  float y = (x - x) / (x - x);
-  return isnan (x) ? y : with_errnof (y, EDOM);
-}
-
-/* Check result and set errno if necessary.  */
-
-HIDDEN float
-__math_check_uflowf (float y)
-{
-  return y == 0.0f ? with_errnof (y, ERANGE) : y;
-}
-
-HIDDEN float
-__math_check_oflowf (float y)
-{
-  return isinf (y) ? with_errnof (y, ERANGE) : y;
-}
diff --git a/pl/math/pl_sig.h b/pl/math/pl_sig.h
deleted file mode 100644
index 52d988f0e1ce6e..00000000000000
--- a/pl/math/pl_sig.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * PL macros for emitting various ulp/bench entries based on function signature
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
- */
-
-#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
-#define V_NAME_D1(fun) _ZGVnN2v_##fun
-#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
-#define V_NAME_D2(fun) _ZGVnN2vv_##fun
-
-#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
-#define SV_NAME_D1(fun) _ZGVsMxv_##fun
-#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
-#define SV_NAME_D2(fun) _ZGVsMxvv_##fun
-
-#define PL_DECL_SF1(fun) float fun##f (float);
-#define PL_DECL_SF2(fun) float fun##f (float, float);
-#define PL_DECL_SD1(fun) double fun (double);
-#define PL_DECL_SD2(fun) double fun (double, double);
-
-#if WANT_VMATH
-# define PL_DECL_VF1(fun)                                                    \
-    VPCS_ATTR float32x4_t V_NAME_F1 (fun##f) (float32x4_t);
-# define PL_DECL_VF2(fun)                                                    \
-    VPCS_ATTR float32x4_t V_NAME_F2 (fun##f) (float32x4_t, float32x4_t);
-# define PL_DECL_VD1(fun) VPCS_ATTR float64x2_t V_NAME_D1 (fun) (float64x2_t);
-# define PL_DECL_VD2(fun)                                                    \
-    VPCS_ATTR float64x2_t V_NAME_D2 (fun) (float64x2_t, float64x2_t);
-#else
-# define PL_DECL_VF1(fun)
-# define PL_DECL_VF2(fun)
-# define PL_DECL_VD1(fun)
-# define PL_DECL_VD2(fun)
-#endif
-
-#if WANT_SVE_MATH
-# define PL_DECL_SVF1(fun)                                                   \
-    svfloat32_t SV_NAME_F1 (fun) (svfloat32_t, svbool_t);
-# define PL_DECL_SVF2(fun)                                                   \
-    svfloat32_t SV_NAME_F2 (fun) (svfloat32_t, svfloat32_t, svbool_t);
-# define PL_DECL_SVD1(fun)                                                   \
-    svfloat64_t SV_NAME_D1 (fun) (svfloat64_t, svbool_t);
-# define PL_DECL_SVD2(fun)                                                   \
-    svfloat64_t SV_NAME_D2 (fun) (svfloat64_t, svfloat64_t, svbool_t);
-#else
-# define PL_DECL_SVF1(fun)
-# define PL_DECL_SVF2(fun)
-# define PL_DECL_SVD1(fun)
-# define PL_DECL_SVD2(fun)
-#endif
-
-/* For building the routines, emit function prototype from PL_SIG. This
-   ensures that the correct signature has been chosen (wrong one will be a
-   compile error). PL_SIG is defined differently by various components of the
-   build system to emit entries in the wrappers and entries for mathbench and
-   ulp.  */
-#define PL_SIG(v, t, a, f, ...) PL_DECL_##v##t##a (f)
diff --git a/pl/math/sv_acosh_3u5.c b/pl/math/sv_acosh_3u5.c
deleted file mode 100644
index faf3513314641a..00000000000000
--- a/pl/math/sv_acosh_3u5.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Double-precision SVE acosh(x) function.
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define WANT_SV_LOG1P_K0_SHORTCUT 1
-#include "sv_log1p_inline.h"
-
-#define BigBoundTop 0x5fe /* top12 (asuint64 (0x1p511)).  */
-#define OneTop 0x3ff
-
-static NOINLINE svfloat64_t
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
-{
-  return sv_call_f64 (acosh, x, y, special);
-}
-
-/* SVE approximation for double-precision acosh, based on log1p.
-   The largest observed error is 3.19 ULP in the region where the
-   argument to log1p falls in the k=0 interval, i.e. x close to 1:
-   SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
-					   want 0x1.ed23399f51373p-2.  */
-svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t itop = svlsr_x (pg, svreinterpret_u64 (x), 52);
-  /* (itop - OneTop) >= (BigBoundTop - OneTop).  */
-  svbool_t special = svcmpge (pg, svsub_x (pg, itop, OneTop), sv_u64 (0x1ff));
-
-  svfloat64_t xm1 = svsub_x (pg, x, 1);
-  svfloat64_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1));
-  svfloat64_t y = sv_log1p_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
-
-  /* Fall back to scalar routine for special lanes.  */
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-
-  return y;
-}
-
-PL_SIG (SV, D, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (acosh), 2.69)
-PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 1, 0x1p511, 90000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 0x1p511, inf, 10000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 0, 1, 1000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acosh), -0, -inf, 10000)
diff --git a/pl/math/sv_acoshf_2u8.c b/pl/math/sv_acoshf_2u8.c
deleted file mode 100644
index f527083af40a22..00000000000000
--- a/pl/math/sv_acoshf_2u8.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Single-precision SVE acosh(x) function.
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define One 0x3f800000
-#define Thres 0x20000000 /* asuint(0x1p64) - One.  */
-
-#include "sv_log1pf_inline.h"
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (acoshf, x, y, special);
-}
-
-/* Single-precision SVE acosh(x) routine. Implements the same algorithm as
-   vector acoshf and log1p.
-
-   Maximum error is 2.78 ULPs:
-   SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4
-				     want 0x1.f45b3cp-4.  */
-svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
-{
-  svuint32_t ix = svreinterpret_u32 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
-
-  svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
-  svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
-  svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-  return y;
-}
-
-PL_SIG (SV, F, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (acosh), 2.29)
-PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 0, 1, 500)
-PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 1, 0x1p64, 100000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 0x1p64, inf, 1000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acosh), -0, -inf, 1000)
diff --git a/pl/math/sv_asinh_3u0.c b/pl/math/sv_asinh_3u0.c
deleted file mode 100644
index 711f0dfdbedc66..00000000000000
--- a/pl/math/sv_asinh_3u0.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Double-precision SVE asinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define OneTop sv_u64 (0x3ff)	 /* top12(asuint64(1.0f)).  */
-#define HugeBound sv_u64 (0x5fe) /* top12(asuint64(0x1p511)).  */
-#define TinyBound (0x3e5)	 /* top12(asuint64(0x1p-26)).  */
-#define SignMask (0x8000000000000000)
-
-/* Constants & data for log.  */
-#define A(i) __v_log_data.poly[i]
-#define Ln2 (0x1.62e42fefa39efp-1)
-#define N (1 << V_LOG_TABLE_BITS)
-#define OFF (0x3fe6900900000000)
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
-{
-  return sv_call_f64 (asinh, x, y, special);
-}
-
-static inline svfloat64_t
-__sv_log_inline (svfloat64_t x, const svbool_t pg)
-{
-  /* Double-precision SVE log, copied from pl/math/sv_log_2u5.c with some
-     cosmetic modification and special-cases removed. See that file for details
-     of the algorithm used.  */
-  svuint64_t ix = svreinterpret_u64 (x);
-  svuint64_t tmp = svsub_x (pg, ix, OFF);
-  svuint64_t i
-      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
-  svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
-  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
-  svfloat64_t z = svreinterpret_f64 (iz);
-  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
-  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
-  svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z);
-  svfloat64_t kd = svcvt_f64_x (pg, k);
-  svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, Ln2);
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = svmla_x (pg, sv_f64 (A (2)), r, A (3));
-  svfloat64_t p = svmla_x (pg, sv_f64 (A (0)), r, A (1));
-  y = svmla_x (pg, y, r2, A (4));
-  y = svmla_x (pg, p, r2, y);
-  y = svmla_x (pg, hi, r2, y);
-  return y;
-}
-
-/* Double-precision implementation of SVE asinh(x).
-   asinh is very sensitive around 1, so it is impractical to devise a single
-   low-cost algorithm which is sufficiently accurate on a wide range of input.
-   Instead we use two different algorithms:
-   asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)      if |x| >= 1
-	    = sign(x) * (|x| + |x|^3 * P(x^2))       otherwise
-   where log(x) is an optimized log approximation, and P(x) is a polynomial
-   shared with the scalar routine. The greatest observed error 2.51 ULP, in
-   |x| >= 1:
-   _ZGVsMxv_asinh(0x1.170469d024505p+0) got 0x1.e3181c43b0f36p-1
-				       want 0x1.e3181c43b0f39p-1.  */
-svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t ix = svreinterpret_u64 (x);
-  svuint64_t iax = svbic_x (pg, ix, SignMask);
-  svuint64_t sign = svand_x (pg, ix, SignMask);
-  svfloat64_t ax = svreinterpret_f64 (iax);
-  svuint64_t top12 = svlsr_x (pg, iax, 52);
-
-  svbool_t ge1 = svcmpge (pg, top12, OneTop);
-  svbool_t special = svcmpge (pg, top12, HugeBound);
-
-  /* Option 1: |x| >= 1.
-     Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).  */
-  svfloat64_t option_1 = sv_f64 (0);
-  if (likely (svptest_any (pg, ge1)))
-    {
-      svfloat64_t axax = svmul_x (pg, ax, ax);
-      option_1 = __sv_log_inline (
-	  svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, axax, 1))), pg);
-    }
-
-  /* Option 2: |x| < 1.
-     Compute asinh(x) using a polynomial.
-     The largest observed error in this region is 1.51 ULPs:
-     _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1
-					 want 0x1.c1e649ee2681dp-1.  */
-  svfloat64_t option_2 = sv_f64 (0);
-  if (likely (svptest_any (pg, svnot_z (pg, ge1))))
-    {
-      svfloat64_t x2 = svmul_x (pg, ax, ax);
-      svfloat64_t z2 = svmul_x (pg, x2, x2);
-      svfloat64_t z4 = svmul_x (pg, z2, z2);
-      svfloat64_t z8 = svmul_x (pg, z4, z4);
-      svfloat64_t z16 = svmul_x (pg, z8, z8);
-      svfloat64_t p
-	  = sv_estrin_17_f64_x (pg, x2, z2, z4, z8, z16, __asinh_data.poly);
-      option_2 = svmla_x (pg, ax, p, svmul_x (pg, x2, ax));
-    }
-
-  /* Choose the right option for each lane.  */
-  svfloat64_t y = svsel (ge1, option_1, option_2);
-
-  /* Apply sign of x to y.  */
-  y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-  return y;
-}
-
-PL_SIG (SV, D, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (asinh), 2.52)
-/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
-   Ensures the svsel is choosing the right option in all cases.  */
-#define SV_ASINH_INTERVAL(lo, hi, n)                                          \
-  PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 0.5)                 \
-  PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 2)                   \
-  PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 0x1p600)
-SV_ASINH_INTERVAL (0, 0x1p-26, 50000)
-SV_ASINH_INTERVAL (0x1p-26, 1, 50000)
-SV_ASINH_INTERVAL (1, 0x1p511, 50000)
-SV_ASINH_INTERVAL (0x1p511, inf, 40000)
diff --git a/pl/math/sv_coshf_2u.c b/pl/math/sv_coshf_2u.c
deleted file mode 100644
index 81680fef318e84..00000000000000
--- a/pl/math/sv_coshf_2u.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Single-precision SVE cosh(x) function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#include "sv_expf_inline.h"
-
-static const struct data
-{
-  struct sv_expf_data expf_consts;
-  uint32_t special_bound;
-} data = {
-  .expf_consts = SV_EXPF_DATA,
-  /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
-  .special_bound = 0x42ad496c,
-};
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
-{
-  return sv_call_f32 (coshf, x, y, pg);
-}
-
-/* Single-precision vector cosh, using vector expf.
-   Maximum error is 1.89 ULP:
-   _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127
-				  want 0x1.f00adcp+127.  */
-svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  svfloat32_t ax = svabs_x (pg, x);
-  svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound);
-
-  /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.  */
-  svfloat32_t t = expf_inline (ax, pg, &d->expf_consts);
-  svfloat32_t half_t = svmul_x (pg, t, 0.5);
-  svfloat32_t half_over_t = svdivr_x (pg, t, 0.5);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svadd_x (pg, half_t, half_over_t), special);
-
-  return svadd_x (pg, half_t, half_over_t);
-}
-
-PL_SIG (SV, F, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (cosh), 1.39)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1p-63, 100)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1.5a92d8p+6, 80000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
diff --git a/pl/math/sv_erf_data.c b/pl/math/sv_erf_data.c
deleted file mode 100644
index 7244aceda5a5be..00000000000000
--- a/pl/math/sv_erf_data.c
+++ /dev/null
@@ -1,1558 +0,0 @@
-/*
- * Data for approximation of erf.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Lookup table used in vector erf.
-   For each possible rounded input r (multiples of 1/128), between
-   r = 0.0 and r = 6.0 (769 values):
-   - the first entry __erf_data.tab.erf contains the values of erf(r),
-   - the second entry __erf_data.tab.scale contains the values of
-   2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
-   algorithm, since lookup is performed only for x >= 1/64-1/512.  */
-const struct sv_erf_data __sv_erf_data = {
-  .erf = { 0x0.0000000000000p+0,
-	   0x1.20dbf3deb1340p-7,
-	   0x1.20d77083f17a0p-6,
-	   0x1.b137e0cf584dcp-6,
-	   0x1.20c5645dd2538p-5,
-	   0x1.68e5d3bbc9526p-5,
-	   0x1.b0fafef135745p-5,
-	   0x1.f902a77bd3821p-5,
-	   0x1.207d480e90658p-4,
-	   0x1.44703e87e8593p-4,
-	   0x1.68591a1e83b5dp-4,
-	   0x1.8c36beb8a8d23p-4,
-	   0x1.b0081148a873ap-4,
-	   0x1.d3cbf7e70a4b3p-4,
-	   0x1.f78159ec8bb50p-4,
-	   0x1.0d939005f65e5p-3,
-	   0x1.1f5e1a35c3b89p-3,
-	   0x1.311fc15f56d14p-3,
-	   0x1.42d7fc2f64959p-3,
-	   0x1.548642321d7c6p-3,
-	   0x1.662a0bdf7a89fp-3,
-	   0x1.77c2d2a765f9ep-3,
-	   0x1.895010fdbdbfdp-3,
-	   0x1.9ad142662e14dp-3,
-	   0x1.ac45e37fe2526p-3,
-	   0x1.bdad72110a648p-3,
-	   0x1.cf076d1233237p-3,
-	   0x1.e05354b96ff36p-3,
-	   0x1.f190aa85540e2p-3,
-	   0x1.015f78a3dcf3dp-2,
-	   0x1.09eed6982b948p-2,
-	   0x1.127631eb8de32p-2,
-	   0x1.1af54e232d609p-2,
-	   0x1.236bef825d9a2p-2,
-	   0x1.2bd9db0f7827fp-2,
-	   0x1.343ed6989b7d9p-2,
-	   0x1.3c9aa8b84bedap-2,
-	   0x1.44ed18d9f6462p-2,
-	   0x1.4d35ef3e5372ep-2,
-	   0x1.5574f4ffac98ep-2,
-	   0x1.5da9f415ff23fp-2,
-	   0x1.65d4b75b00471p-2,
-	   0x1.6df50a8dff772p-2,
-	   0x1.760aba57a76bfp-2,
-	   0x1.7e15944d9d3e4p-2,
-	   0x1.861566f5fd3c0p-2,
-	   0x1.8e0a01cab516bp-2,
-	   0x1.95f3353cbb146p-2,
-	   0x1.9dd0d2b721f39p-2,
-	   0x1.a5a2aca209394p-2,
-	   0x1.ad68966569a87p-2,
-	   0x1.b522646bbda68p-2,
-	   0x1.bccfec24855b8p-2,
-	   0x1.c4710406a65fcp-2,
-	   0x1.cc058392a6d2dp-2,
-	   0x1.d38d4354c3bd0p-2,
-	   0x1.db081ce6e2a48p-2,
-	   0x1.e275eaf25e458p-2,
-	   0x1.e9d68931ae650p-2,
-	   0x1.f129d471eabb1p-2,
-	   0x1.f86faa9428f9dp-2,
-	   0x1.ffa7ea8eb5fd0p-2,
-	   0x1.03693a371519cp-1,
-	   0x1.06f794ab2cae7p-1,
-	   0x1.0a7ef5c18edd2p-1,
-	   0x1.0dff4f247f6c6p-1,
-	   0x1.1178930ada115p-1,
-	   0x1.14eab43841b55p-1,
-	   0x1.1855a5fd3dd50p-1,
-	   0x1.1bb95c3746199p-1,
-	   0x1.1f15cb50bc4dep-1,
-	   0x1.226ae840d4d70p-1,
-	   0x1.25b8a88b6dd7fp-1,
-	   0x1.28ff0240d52cdp-1,
-	   0x1.2c3debfd7d6c1p-1,
-	   0x1.2f755ce9a21f4p-1,
-	   0x1.32a54cb8db67bp-1,
-	   0x1.35cdb3a9a144dp-1,
-	   0x1.38ee8a84beb71p-1,
-	   0x1.3c07ca9cb4f9ep-1,
-	   0x1.3f196dcd0f135p-1,
-	   0x1.42236e79a5fa6p-1,
-	   0x1.4525c78dd5966p-1,
-	   0x1.4820747ba2dc2p-1,
-	   0x1.4b13713ad3513p-1,
-	   0x1.4dfeba47f63ccp-1,
-	   0x1.50e24ca35fd2cp-1,
-	   0x1.53be25d016a4fp-1,
-	   0x1.569243d2b3a9bp-1,
-	   0x1.595ea53035283p-1,
-	   0x1.5c2348ecc4dc3p-1,
-	   0x1.5ee02e8a71a53p-1,
-	   0x1.61955607dd15dp-1,
-	   0x1.6442bfdedd397p-1,
-	   0x1.66e86d0312e82p-1,
-	   0x1.69865ee075011p-1,
-	   0x1.6c1c9759d0e5fp-1,
-	   0x1.6eab18c74091bp-1,
-	   0x1.7131e5f496a5ap-1,
-	   0x1.73b1021fc0cb8p-1,
-	   0x1.762870f720c6fp-1,
-	   0x1.78983697dc96fp-1,
-	   0x1.7b00578c26037p-1,
-	   0x1.7d60d8c979f7bp-1,
-	   0x1.7fb9bfaed8078p-1,
-	   0x1.820b1202f27fbp-1,
-	   0x1.8454d5f25760dp-1,
-	   0x1.8697120d92a4ap-1,
-	   0x1.88d1cd474a2e0p-1,
-	   0x1.8b050ef253c37p-1,
-	   0x1.8d30debfc572ep-1,
-	   0x1.8f5544bd00c04p-1,
-	   0x1.91724951b8fc6p-1,
-	   0x1.9387f53df5238p-1,
-	   0x1.959651980da31p-1,
-	   0x1.979d67caa6631p-1,
-	   0x1.999d4192a5715p-1,
-	   0x1.9b95e8fd26abap-1,
-	   0x1.9d8768656cc42p-1,
-	   0x1.9f71ca72cffb6p-1,
-	   0x1.a1551a16aaeafp-1,
-	   0x1.a331628a45b92p-1,
-	   0x1.a506af4cc00f4p-1,
-	   0x1.a6d50c20fa293p-1,
-	   0x1.a89c850b7d54dp-1,
-	   0x1.aa5d265064366p-1,
-	   0x1.ac16fc7143263p-1,
-	   0x1.adca142b10f98p-1,
-	   0x1.af767a741088bp-1,
-	   0x1.b11c3c79bb424p-1,
-	   0x1.b2bb679ead19cp-1,
-	   0x1.b4540978921eep-1,
-	   0x1.b5e62fce16095p-1,
-	   0x1.b771e894d602ep-1,
-	   0x1.b8f741ef54f83p-1,
-	   0x1.ba764a2af2b78p-1,
-	   0x1.bbef0fbde6221p-1,
-	   0x1.bd61a1453ab44p-1,
-	   0x1.bece0d82d1a5cp-1,
-	   0x1.c034635b66e23p-1,
-	   0x1.c194b1d49a184p-1,
-	   0x1.c2ef0812fc1bdp-1,
-	   0x1.c443755820d64p-1,
-	   0x1.c5920900b5fd1p-1,
-	   0x1.c6dad2829ec62p-1,
-	   0x1.c81de16b14cefp-1,
-	   0x1.c95b455cce69dp-1,
-	   0x1.ca930e0e2a825p-1,
-	   0x1.cbc54b476248dp-1,
-	   0x1.ccf20ce0c0d27p-1,
-	   0x1.ce1962c0e0d8bp-1,
-	   0x1.cf3b5cdaf0c39p-1,
-	   0x1.d0580b2cfd249p-1,
-	   0x1.d16f7dbe41ca0p-1,
-	   0x1.d281c49d818d0p-1,
-	   0x1.d38eefdf64fddp-1,
-	   0x1.d4970f9ce00d9p-1,
-	   0x1.d59a33f19ed42p-1,
-	   0x1.d6986cfa798e7p-1,
-	   0x1.d791cad3eff01p-1,
-	   0x1.d8865d98abe01p-1,
-	   0x1.d97635600bb89p-1,
-	   0x1.da61623cb41e0p-1,
-	   0x1.db47f43b2980dp-1,
-	   0x1.dc29fb60715afp-1,
-	   0x1.dd0787a8bb39dp-1,
-	   0x1.dde0a90611a0dp-1,
-	   0x1.deb56f5f12d28p-1,
-	   0x1.df85ea8db188ep-1,
-	   0x1.e0522a5dfda73p-1,
-	   0x1.e11a3e8cf4eb8p-1,
-	   0x1.e1de36c75ba58p-1,
-	   0x1.e29e22a89d766p-1,
-	   0x1.e35a11b9b61cep-1,
-	   0x1.e4121370224ccp-1,
-	   0x1.e4c6372cd8927p-1,
-	   0x1.e5768c3b4a3fcp-1,
-	   0x1.e62321d06c5e0p-1,
-	   0x1.e6cc0709c8a0dp-1,
-	   0x1.e7714aec96534p-1,
-	   0x1.e812fc64db369p-1,
-	   0x1.e8b12a44944a8p-1,
-	   0x1.e94be342e6743p-1,
-	   0x1.e9e335fb56f87p-1,
-	   0x1.ea7730ed0bbb9p-1,
-	   0x1.eb07e27a133aap-1,
-	   0x1.eb9558e6b42cep-1,
-	   0x1.ec1fa258c4beap-1,
-	   0x1.eca6ccd709544p-1,
-	   0x1.ed2ae6489ac1ep-1,
-	   0x1.edabfc7453e63p-1,
-	   0x1.ee2a1d004692cp-1,
-	   0x1.eea5557137ae0p-1,
-	   0x1.ef1db32a2277cp-1,
-	   0x1.ef93436bc2daap-1,
-	   0x1.f006135426b26p-1,
-	   0x1.f0762fde45ee6p-1,
-	   0x1.f0e3a5e1a1788p-1,
-	   0x1.f14e8211e8c55p-1,
-	   0x1.f1b6d0fea5f4dp-1,
-	   0x1.f21c9f12f0677p-1,
-	   0x1.f27ff89525acfp-1,
-	   0x1.f2e0e9a6a8b09p-1,
-	   0x1.f33f7e43a706bp-1,
-	   0x1.f39bc242e43e6p-1,
-	   0x1.f3f5c1558b19ep-1,
-	   0x1.f44d870704911p-1,
-	   0x1.f4a31ebcd47dfp-1,
-	   0x1.f4f693b67bd77p-1,
-	   0x1.f547f10d60597p-1,
-	   0x1.f59741b4b97cfp-1,
-	   0x1.f5e4907982a07p-1,
-	   0x1.f62fe80272419p-1,
-	   0x1.f67952cff6282p-1,
-	   0x1.f6c0db3c34641p-1,
-	   0x1.f7068b7b10fd9p-1,
-	   0x1.f74a6d9a38383p-1,
-	   0x1.f78c8b812d498p-1,
-	   0x1.f7cceef15d631p-1,
-	   0x1.f80ba18636f07p-1,
-	   0x1.f848acb544e95p-1,
-	   0x1.f88419ce4e184p-1,
-	   0x1.f8bdf1fb78370p-1,
-	   0x1.f8f63e416ebffp-1,
-	   0x1.f92d077f8d56dp-1,
-	   0x1.f96256700da8ep-1,
-	   0x1.f99633a838a57p-1,
-	   0x1.f9c8a7989af0dp-1,
-	   0x1.f9f9ba8d3c733p-1,
-	   0x1.fa2974addae45p-1,
-	   0x1.fa57ddfe27376p-1,
-	   0x1.fa84fe5e05c8dp-1,
-	   0x1.fab0dd89d1309p-1,
-	   0x1.fadb831a9f9c3p-1,
-	   0x1.fb04f6868a944p-1,
-	   0x1.fb2d3f20f9101p-1,
-	   0x1.fb54641aebbc9p-1,
-	   0x1.fb7a6c834b5a2p-1,
-	   0x1.fb9f5f4739170p-1,
-	   0x1.fbc3433260ca5p-1,
-	   0x1.fbe61eef4cf6ap-1,
-	   0x1.fc07f907bc794p-1,
-	   0x1.fc28d7e4f9cd0p-1,
-	   0x1.fc48c1d033c7ap-1,
-	   0x1.fc67bcf2d7b8fp-1,
-	   0x1.fc85cf56ecd38p-1,
-	   0x1.fca2fee770c79p-1,
-	   0x1.fcbf5170b578bp-1,
-	   0x1.fcdacca0bfb73p-1,
-	   0x1.fcf57607a6e7cp-1,
-	   0x1.fd0f5317f582fp-1,
-	   0x1.fd2869270a56fp-1,
-	   0x1.fd40bd6d7a785p-1,
-	   0x1.fd58550773cb5p-1,
-	   0x1.fd6f34f52013ap-1,
-	   0x1.fd85621b0876dp-1,
-	   0x1.fd9ae142795e3p-1,
-	   0x1.fdafb719e6a69p-1,
-	   0x1.fdc3e835500b3p-1,
-	   0x1.fdd7790ea5bc0p-1,
-	   0x1.fdea6e062d0c9p-1,
-	   0x1.fdfccb62e52d3p-1,
-	   0x1.fe0e9552ebdd6p-1,
-	   0x1.fe1fcfebe2083p-1,
-	   0x1.fe307f2b503d0p-1,
-	   0x1.fe40a6f70af4bp-1,
-	   0x1.fe504b1d9696cp-1,
-	   0x1.fe5f6f568b301p-1,
-	   0x1.fe6e1742f7cf6p-1,
-	   0x1.fe7c466dc57a1p-1,
-	   0x1.fe8a004c19ae6p-1,
-	   0x1.fe97483db8670p-1,
-	   0x1.fea4218d6594ap-1,
-	   0x1.feb08f7146046p-1,
-	   0x1.febc950b3fa75p-1,
-	   0x1.fec835695932ep-1,
-	   0x1.fed37386190fbp-1,
-	   0x1.fede5248e38f4p-1,
-	   0x1.fee8d486585eep-1,
-	   0x1.fef2fd00af31ap-1,
-	   0x1.fefcce6813974p-1,
-	   0x1.ff064b5afffbep-1,
-	   0x1.ff0f766697c76p-1,
-	   0x1.ff18520700971p-1,
-	   0x1.ff20e0a7ba8c2p-1,
-	   0x1.ff2924a3f7a83p-1,
-	   0x1.ff312046f2339p-1,
-	   0x1.ff38d5cc4227fp-1,
-	   0x1.ff404760319b4p-1,
-	   0x1.ff47772010262p-1,
-	   0x1.ff4e671a85425p-1,
-	   0x1.ff55194fe19dfp-1,
-	   0x1.ff5b8fb26f5f6p-1,
-	   0x1.ff61cc26c1578p-1,
-	   0x1.ff67d08401202p-1,
-	   0x1.ff6d9e943c231p-1,
-	   0x1.ff733814af88cp-1,
-	   0x1.ff789eb6130c9p-1,
-	   0x1.ff7dd41ce2b4dp-1,
-	   0x1.ff82d9e1a76d8p-1,
-	   0x1.ff87b1913e853p-1,
-	   0x1.ff8c5cad200a5p-1,
-	   0x1.ff90dcaba4096p-1,
-	   0x1.ff9532f846ab0p-1,
-	   0x1.ff9960f3eb327p-1,
-	   0x1.ff9d67f51ddbap-1,
-	   0x1.ffa14948549a7p-1,
-	   0x1.ffa506302ebaep-1,
-	   0x1.ffa89fe5b3625p-1,
-	   0x1.ffac17988ef4bp-1,
-	   0x1.ffaf6e6f4f5c0p-1,
-	   0x1.ffb2a5879f35ep-1,
-	   0x1.ffb5bdf67fe6fp-1,
-	   0x1.ffb8b8c88295fp-1,
-	   0x1.ffbb970200110p-1,
-	   0x1.ffbe599f4f9d9p-1,
-	   0x1.ffc10194fcb64p-1,
-	   0x1.ffc38fcffbb7cp-1,
-	   0x1.ffc60535dd7f5p-1,
-	   0x1.ffc862a501fd7p-1,
-	   0x1.ffcaa8f4c9beap-1,
-	   0x1.ffccd8f5c66d1p-1,
-	   0x1.ffcef371ea4d7p-1,
-	   0x1.ffd0f92cb6ba7p-1,
-	   0x1.ffd2eae369a07p-1,
-	   0x1.ffd4c94d29fdbp-1,
-	   0x1.ffd6951b33686p-1,
-	   0x1.ffd84ef9009eep-1,
-	   0x1.ffd9f78c7524ap-1,
-	   0x1.ffdb8f7605ee7p-1,
-	   0x1.ffdd1750e1220p-1,
-	   0x1.ffde8fb314ebfp-1,
-	   0x1.ffdff92db56e5p-1,
-	   0x1.ffe1544d01ccbp-1,
-	   0x1.ffe2a1988857cp-1,
-	   0x1.ffe3e19349dc7p-1,
-	   0x1.ffe514bbdc197p-1,
-	   0x1.ffe63b8c8b5f7p-1,
-	   0x1.ffe7567b7b5e1p-1,
-	   0x1.ffe865fac722bp-1,
-	   0x1.ffe96a78a04a9p-1,
-	   0x1.ffea645f6d6dap-1,
-	   0x1.ffeb5415e7c44p-1,
-	   0x1.ffec39ff380b9p-1,
-	   0x1.ffed167b12ac2p-1,
-	   0x1.ffede9e5d3262p-1,
-	   0x1.ffeeb49896c6dp-1,
-	   0x1.ffef76e956a9fp-1,
-	   0x1.fff0312b010b5p-1,
-	   0x1.fff0e3ad91ec2p-1,
-	   0x1.fff18ebe2b0e1p-1,
-	   0x1.fff232a72b48ep-1,
-	   0x1.fff2cfb0453d9p-1,
-	   0x1.fff3661e9569dp-1,
-	   0x1.fff3f634b79f9p-1,
-	   0x1.fff48032dbe40p-1,
-	   0x1.fff50456dab8cp-1,
-	   0x1.fff582dc48d30p-1,
-	   0x1.fff5fbfc8a439p-1,
-	   0x1.fff66feee5129p-1,
-	   0x1.fff6dee89352ep-1,
-	   0x1.fff7491cd4af6p-1,
-	   0x1.fff7aebcff755p-1,
-	   0x1.fff80ff8911fdp-1,
-	   0x1.fff86cfd3e657p-1,
-	   0x1.fff8c5f702ccfp-1,
-	   0x1.fff91b102fca8p-1,
-	   0x1.fff96c717b695p-1,
-	   0x1.fff9ba420e834p-1,
-	   0x1.fffa04a7928b1p-1,
-	   0x1.fffa4bc63ee9ap-1,
-	   0x1.fffa8fc0e5f33p-1,
-	   0x1.fffad0b901755p-1,
-	   0x1.fffb0ecebee1bp-1,
-	   0x1.fffb4a210b172p-1,
-	   0x1.fffb82cd9dcbfp-1,
-	   0x1.fffbb8f1049c6p-1,
-	   0x1.fffbeca6adbe9p-1,
-	   0x1.fffc1e08f25f5p-1,
-	   0x1.fffc4d3120aa1p-1,
-	   0x1.fffc7a37857d2p-1,
-	   0x1.fffca53375ce3p-1,
-	   0x1.fffcce3b57bffp-1,
-	   0x1.fffcf564ab6b7p-1,
-	   0x1.fffd1ac4135f9p-1,
-	   0x1.fffd3e6d5cd87p-1,
-	   0x1.fffd607387b07p-1,
-	   0x1.fffd80e8ce0dap-1,
-	   0x1.fffd9fdeabccep-1,
-	   0x1.fffdbd65e5ad0p-1,
-	   0x1.fffdd98e903b2p-1,
-	   0x1.fffdf46816833p-1,
-	   0x1.fffe0e0140857p-1,
-	   0x1.fffe26683972ap-1,
-	   0x1.fffe3daa95b18p-1,
-	   0x1.fffe53d558ae9p-1,
-	   0x1.fffe68f4fa777p-1,
-	   0x1.fffe7d156d244p-1,
-	   0x1.fffe904222101p-1,
-	   0x1.fffea2860ee1ep-1,
-	   0x1.fffeb3ebb267bp-1,
-	   0x1.fffec47d19457p-1,
-	   0x1.fffed443e2787p-1,
-	   0x1.fffee34943b15p-1,
-	   0x1.fffef1960d85dp-1,
-	   0x1.fffeff32af7afp-1,
-	   0x1.ffff0c273bea2p-1,
-	   0x1.ffff187b6bc0ep-1,
-	   0x1.ffff2436a21dcp-1,
-	   0x1.ffff2f5fefcaap-1,
-	   0x1.ffff39fe16963p-1,
-	   0x1.ffff44178c8d2p-1,
-	   0x1.ffff4db27f146p-1,
-	   0x1.ffff56d4d5e5ep-1,
-	   0x1.ffff5f8435efcp-1,
-	   0x1.ffff67c604180p-1,
-	   0x1.ffff6f9f67e55p-1,
-	   0x1.ffff77154e0d6p-1,
-	   0x1.ffff7e2c6aea2p-1,
-	   0x1.ffff84e93cd75p-1,
-	   0x1.ffff8b500e77cp-1,
-	   0x1.ffff9164f8e46p-1,
-	   0x1.ffff972be5c59p-1,
-	   0x1.ffff9ca891572p-1,
-	   0x1.ffffa1de8c582p-1,
-	   0x1.ffffa6d13de73p-1,
-	   0x1.ffffab83e54b8p-1,
-	   0x1.ffffaff99bac4p-1,
-	   0x1.ffffb43555b5fp-1,
-	   0x1.ffffb839e52f3p-1,
-	   0x1.ffffbc09fa7cdp-1,
-	   0x1.ffffbfa82616bp-1,
-	   0x1.ffffc316d9ed0p-1,
-	   0x1.ffffc6586abf6p-1,
-	   0x1.ffffc96f1165ep-1,
-	   0x1.ffffcc5cec0c1p-1,
-	   0x1.ffffcf23ff5fcp-1,
-	   0x1.ffffd1c637b2bp-1,
-	   0x1.ffffd4456a10dp-1,
-	   0x1.ffffd6a3554a1p-1,
-	   0x1.ffffd8e1a2f22p-1,
-	   0x1.ffffdb01e8546p-1,
-	   0x1.ffffdd05a75eap-1,
-	   0x1.ffffdeee4f810p-1,
-	   0x1.ffffe0bd3e852p-1,
-	   0x1.ffffe273c15b7p-1,
-	   0x1.ffffe41314e06p-1,
-	   0x1.ffffe59c6698bp-1,
-	   0x1.ffffe710d565ep-1,
-	   0x1.ffffe8717232dp-1,
-	   0x1.ffffe9bf4098cp-1,
-	   0x1.ffffeafb377d5p-1,
-	   0x1.ffffec2641a9ep-1,
-	   0x1.ffffed413e5b7p-1,
-	   0x1.ffffee4d01cd6p-1,
-	   0x1.ffffef4a55bd4p-1,
-	   0x1.fffff039f9e8fp-1,
-	   0x1.fffff11ca4876p-1,
-	   0x1.fffff1f302bc1p-1,
-	   0x1.fffff2bdb904dp-1,
-	   0x1.fffff37d63a36p-1,
-	   0x1.fffff43297019p-1,
-	   0x1.fffff4dde0118p-1,
-	   0x1.fffff57fc4a95p-1,
-	   0x1.fffff618c3da6p-1,
-	   0x1.fffff6a956450p-1,
-	   0x1.fffff731ee681p-1,
-	   0x1.fffff7b2f8ed6p-1,
-	   0x1.fffff82cdcf1bp-1,
-	   0x1.fffff89ffc4aap-1,
-	   0x1.fffff90cb3c81p-1,
-	   0x1.fffff9735b73bp-1,
-	   0x1.fffff9d446cccp-1,
-	   0x1.fffffa2fc5015p-1,
-	   0x1.fffffa8621251p-1,
-	   0x1.fffffad7a2652p-1,
-	   0x1.fffffb248c39dp-1,
-	   0x1.fffffb6d1e95dp-1,
-	   0x1.fffffbb196132p-1,
-	   0x1.fffffbf22c1e2p-1,
-	   0x1.fffffc2f171e3p-1,
-	   0x1.fffffc688a9cfp-1,
-	   0x1.fffffc9eb76acp-1,
-	   0x1.fffffcd1cbc28p-1,
-	   0x1.fffffd01f36afp-1,
-	   0x1.fffffd2f57d68p-1,
-	   0x1.fffffd5a2041fp-1,
-	   0x1.fffffd8271d12p-1,
-	   0x1.fffffda86faa9p-1,
-	   0x1.fffffdcc3b117p-1,
-	   0x1.fffffdedf37edp-1,
-	   0x1.fffffe0db6b91p-1,
-	   0x1.fffffe2ba0ea5p-1,
-	   0x1.fffffe47ccb60p-1,
-	   0x1.fffffe62534d4p-1,
-	   0x1.fffffe7b4c81ep-1,
-	   0x1.fffffe92ced93p-1,
-	   0x1.fffffea8ef9cfp-1,
-	   0x1.fffffebdc2ec6p-1,
-	   0x1.fffffed15bcbap-1,
-	   0x1.fffffee3cc32cp-1,
-	   0x1.fffffef5251c2p-1,
-	   0x1.ffffff0576917p-1,
-	   0x1.ffffff14cfb92p-1,
-	   0x1.ffffff233ee1dp-1,
-	   0x1.ffffff30d18e8p-1,
-	   0x1.ffffff3d9480fp-1,
-	   0x1.ffffff4993c46p-1,
-	   0x1.ffffff54dab72p-1,
-	   0x1.ffffff5f74141p-1,
-	   0x1.ffffff6969fb8p-1,
-	   0x1.ffffff72c5fb6p-1,
-	   0x1.ffffff7b91176p-1,
-	   0x1.ffffff83d3d07p-1,
-	   0x1.ffffff8b962bep-1,
-	   0x1.ffffff92dfba2p-1,
-	   0x1.ffffff99b79d2p-1,
-	   0x1.ffffffa0248e8p-1,
-	   0x1.ffffffa62ce54p-1,
-	   0x1.ffffffabd69b4p-1,
-	   0x1.ffffffb127525p-1,
-	   0x1.ffffffb624592p-1,
-	   0x1.ffffffbad2affp-1,
-	   0x1.ffffffbf370cdp-1,
-	   0x1.ffffffc355dfdp-1,
-	   0x1.ffffffc733572p-1,
-	   0x1.ffffffcad3626p-1,
-	   0x1.ffffffce39b67p-1,
-	   0x1.ffffffd169d0cp-1,
-	   0x1.ffffffd466fa5p-1,
-	   0x1.ffffffd7344aap-1,
-	   0x1.ffffffd9d4aabp-1,
-	   0x1.ffffffdc4ad7ap-1,
-	   0x1.ffffffde9964ep-1,
-	   0x1.ffffffe0c2bf0p-1,
-	   0x1.ffffffe2c92dbp-1,
-	   0x1.ffffffe4aed5ep-1,
-	   0x1.ffffffe675bbdp-1,
-	   0x1.ffffffe81fc4ep-1,
-	   0x1.ffffffe9aeb97p-1,
-	   0x1.ffffffeb24467p-1,
-	   0x1.ffffffec81ff2p-1,
-	   0x1.ffffffedc95e7p-1,
-	   0x1.ffffffeefbc85p-1,
-	   0x1.fffffff01a8b6p-1,
-	   0x1.fffffff126e1ep-1,
-	   0x1.fffffff221f30p-1,
-	   0x1.fffffff30cd3fp-1,
-	   0x1.fffffff3e8892p-1,
-	   0x1.fffffff4b606fp-1,
-	   0x1.fffffff57632dp-1,
-	   0x1.fffffff629e44p-1,
-	   0x1.fffffff6d1e56p-1,
-	   0x1.fffffff76ef3fp-1,
-	   0x1.fffffff801c1fp-1,
-	   0x1.fffffff88af67p-1,
-	   0x1.fffffff90b2e3p-1,
-	   0x1.fffffff982fc1p-1,
-	   0x1.fffffff9f2e9fp-1,
-	   0x1.fffffffa5b790p-1,
-	   0x1.fffffffabd229p-1,
-	   0x1.fffffffb18582p-1,
-	   0x1.fffffffb6d844p-1,
-	   0x1.fffffffbbd0aap-1,
-	   0x1.fffffffc0748fp-1,
-	   0x1.fffffffc4c96cp-1,
-	   0x1.fffffffc8d462p-1,
-	   0x1.fffffffcc9a41p-1,
-	   0x1.fffffffd01f89p-1,
-	   0x1.fffffffd36871p-1,
-	   0x1.fffffffd678edp-1,
-	   0x1.fffffffd954aep-1,
-	   0x1.fffffffdbff2ap-1,
-	   0x1.fffffffde7ba0p-1,
-	   0x1.fffffffe0cd16p-1,
-	   0x1.fffffffe2f664p-1,
-	   0x1.fffffffe4fa30p-1,
-	   0x1.fffffffe6daf7p-1,
-	   0x1.fffffffe89b0cp-1,
-	   0x1.fffffffea3c9ap-1,
-	   0x1.fffffffebc1a9p-1,
-	   0x1.fffffffed2c21p-1,
-	   0x1.fffffffee7dc8p-1,
-	   0x1.fffffffefb847p-1,
-	   0x1.ffffffff0dd2bp-1,
-	   0x1.ffffffff1ede9p-1,
-	   0x1.ffffffff2ebdap-1,
-	   0x1.ffffffff3d843p-1,
-	   0x1.ffffffff4b453p-1,
-	   0x1.ffffffff58126p-1,
-	   0x1.ffffffff63fc3p-1,
-	   0x1.ffffffff6f121p-1,
-	   0x1.ffffffff79626p-1,
-	   0x1.ffffffff82fabp-1,
-	   0x1.ffffffff8be77p-1,
-	   0x1.ffffffff94346p-1,
-	   0x1.ffffffff9bec8p-1,
-	   0x1.ffffffffa319fp-1,
-	   0x1.ffffffffa9c63p-1,
-	   0x1.ffffffffaffa4p-1,
-	   0x1.ffffffffb5be5p-1,
-	   0x1.ffffffffbb1a2p-1,
-	   0x1.ffffffffc014ep-1,
-	   0x1.ffffffffc4b56p-1,
-	   0x1.ffffffffc901cp-1,
-	   0x1.ffffffffccfffp-1,
-	   0x1.ffffffffd0b56p-1,
-	   0x1.ffffffffd4271p-1,
-	   0x1.ffffffffd759dp-1,
-	   0x1.ffffffffda520p-1,
-	   0x1.ffffffffdd13cp-1,
-	   0x1.ffffffffdfa2dp-1,
-	   0x1.ffffffffe202dp-1,
-	   0x1.ffffffffe4371p-1,
-	   0x1.ffffffffe642ap-1,
-	   0x1.ffffffffe8286p-1,
-	   0x1.ffffffffe9eb0p-1,
-	   0x1.ffffffffeb8d0p-1,
-	   0x1.ffffffffed10ap-1,
-	   0x1.ffffffffee782p-1,
-	   0x1.ffffffffefc57p-1,
-	   0x1.fffffffff0fa7p-1,
-	   0x1.fffffffff218fp-1,
-	   0x1.fffffffff3227p-1,
-	   0x1.fffffffff4188p-1,
-	   0x1.fffffffff4fc9p-1,
-	   0x1.fffffffff5cfdp-1,
-	   0x1.fffffffff6939p-1,
-	   0x1.fffffffff748ep-1,
-	   0x1.fffffffff7f0dp-1,
-	   0x1.fffffffff88c5p-1,
-	   0x1.fffffffff91c6p-1,
-	   0x1.fffffffff9a1bp-1,
-	   0x1.fffffffffa1d2p-1,
-	   0x1.fffffffffa8f6p-1,
-	   0x1.fffffffffaf92p-1,
-	   0x1.fffffffffb5b0p-1,
-	   0x1.fffffffffbb58p-1,
-	   0x1.fffffffffc095p-1,
-	   0x1.fffffffffc56dp-1,
-	   0x1.fffffffffc9e8p-1,
-	   0x1.fffffffffce0dp-1,
-	   0x1.fffffffffd1e1p-1,
-	   0x1.fffffffffd56cp-1,
-	   0x1.fffffffffd8b3p-1,
-	   0x1.fffffffffdbbap-1,
-	   0x1.fffffffffde86p-1,
-	   0x1.fffffffffe11dp-1,
-	   0x1.fffffffffe380p-1,
-	   0x1.fffffffffe5b6p-1,
-	   0x1.fffffffffe7c0p-1,
-	   0x1.fffffffffe9a2p-1,
-	   0x1.fffffffffeb60p-1,
-	   0x1.fffffffffecfbp-1,
-	   0x1.fffffffffee77p-1,
-	   0x1.fffffffffefd6p-1,
-	   0x1.ffffffffff11ap-1,
-	   0x1.ffffffffff245p-1,
-	   0x1.ffffffffff359p-1,
-	   0x1.ffffffffff457p-1,
-	   0x1.ffffffffff542p-1,
-	   0x1.ffffffffff61bp-1,
-	   0x1.ffffffffff6e3p-1,
-	   0x1.ffffffffff79bp-1,
-	   0x1.ffffffffff845p-1,
-	   0x1.ffffffffff8e2p-1,
-	   0x1.ffffffffff973p-1,
-	   0x1.ffffffffff9f8p-1,
-	   0x1.ffffffffffa73p-1,
-	   0x1.ffffffffffae4p-1,
-	   0x1.ffffffffffb4cp-1,
-	   0x1.ffffffffffbadp-1,
-	   0x1.ffffffffffc05p-1,
-	   0x1.ffffffffffc57p-1,
-	   0x1.ffffffffffca2p-1,
-	   0x1.ffffffffffce7p-1,
-	   0x1.ffffffffffd27p-1,
-	   0x1.ffffffffffd62p-1,
-	   0x1.ffffffffffd98p-1,
-	   0x1.ffffffffffdcap-1,
-	   0x1.ffffffffffdf8p-1,
-	   0x1.ffffffffffe22p-1,
-	   0x1.ffffffffffe49p-1,
-	   0x1.ffffffffffe6cp-1,
-	   0x1.ffffffffffe8dp-1,
-	   0x1.ffffffffffeabp-1,
-	   0x1.ffffffffffec7p-1,
-	   0x1.ffffffffffee1p-1,
-	   0x1.ffffffffffef8p-1,
-	   0x1.fffffffffff0ep-1,
-	   0x1.fffffffffff22p-1,
-	   0x1.fffffffffff34p-1,
-	   0x1.fffffffffff45p-1,
-	   0x1.fffffffffff54p-1,
-	   0x1.fffffffffff62p-1,
-	   0x1.fffffffffff6fp-1,
-	   0x1.fffffffffff7bp-1,
-	   0x1.fffffffffff86p-1,
-	   0x1.fffffffffff90p-1,
-	   0x1.fffffffffff9ap-1,
-	   0x1.fffffffffffa2p-1,
-	   0x1.fffffffffffaap-1,
-	   0x1.fffffffffffb1p-1,
-	   0x1.fffffffffffb8p-1,
-	   0x1.fffffffffffbep-1,
-	   0x1.fffffffffffc3p-1,
-	   0x1.fffffffffffc8p-1,
-	   0x1.fffffffffffcdp-1,
-	   0x1.fffffffffffd1p-1,
-	   0x1.fffffffffffd5p-1,
-	   0x1.fffffffffffd9p-1,
-	   0x1.fffffffffffdcp-1,
-	   0x1.fffffffffffdfp-1,
-	   0x1.fffffffffffe2p-1,
-	   0x1.fffffffffffe4p-1,
-	   0x1.fffffffffffe7p-1,
-	   0x1.fffffffffffe9p-1,
-	   0x1.fffffffffffebp-1,
-	   0x1.fffffffffffedp-1,
-	   0x1.fffffffffffeep-1,
-	   0x1.ffffffffffff0p-1,
-	   0x1.ffffffffffff1p-1,
-	   0x1.ffffffffffff3p-1,
-	   0x1.ffffffffffff4p-1,
-	   0x1.ffffffffffff5p-1,
-	   0x1.ffffffffffff6p-1,
-	   0x1.ffffffffffff7p-1,
-	   0x1.ffffffffffff7p-1,
-	   0x1.ffffffffffff8p-1,
-	   0x1.ffffffffffff9p-1,
-	   0x1.ffffffffffff9p-1,
-	   0x1.ffffffffffffap-1,
-	   0x1.ffffffffffffbp-1,
-	   0x1.ffffffffffffbp-1,
-	   0x1.ffffffffffffbp-1,
-	   0x1.ffffffffffffcp-1,
-	   0x1.ffffffffffffcp-1,
-	   0x1.ffffffffffffdp-1,
-	   0x1.ffffffffffffdp-1,
-	   0x1.ffffffffffffdp-1,
-	   0x1.ffffffffffffdp-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-  },
-  .scale = { 0x1.20dd750429b6dp+0,
-	     0x1.20d8f1975c85dp+0,
-	     0x1.20cb67bd452c7p+0,
-	     0x1.20b4d8bac36c1p+0,
-	     0x1.209546ad13ccfp+0,
-	     0x1.206cb4897b148p+0,
-	     0x1.203b261cd0052p+0,
-	     0x1.2000a00ae3804p+0,
-	     0x1.1fbd27cdc72d3p+0,
-	     0x1.1f70c3b4f2cc7p+0,
-	     0x1.1f1b7ae44867fp+0,
-	     0x1.1ebd5552f795bp+0,
-	     0x1.1e565bca400d4p+0,
-	     0x1.1de697e413d28p+0,
-	     0x1.1d6e14099944ap+0,
-	     0x1.1cecdb718d61cp+0,
-	     0x1.1c62fa1e869b6p+0,
-	     0x1.1bd07cdd189acp+0,
-	     0x1.1b357141d95d5p+0,
-	     0x1.1a91e5a748165p+0,
-	     0x1.19e5e92b964abp+0,
-	     0x1.19318bae53a04p+0,
-	     0x1.1874ddcdfce24p+0,
-	     0x1.17aff0e56ec10p+0,
-	     0x1.16e2d7093cd8cp+0,
-	     0x1.160da304ed92fp+0,
-	     0x1.153068581b781p+0,
-	     0x1.144b3b337c90cp+0,
-	     0x1.135e3075d076bp+0,
-	     0x1.12695da8b5bdep+0,
-	     0x1.116cd8fd67618p+0,
-	     0x1.1068b94962e5ep+0,
-	     0x1.0f5d1602f7e41p+0,
-	     0x1.0e4a073dc1b91p+0,
-	     0x1.0d2fa5a70c168p+0,
-	     0x1.0c0e0a8223359p+0,
-	     0x1.0ae54fa490722p+0,
-	     0x1.09b58f724416bp+0,
-	     0x1.087ee4d9ad247p+0,
-	     0x1.07416b4fbfe7cp+0,
-	     0x1.05fd3ecbec297p+0,
-	     0x1.04b27bc403d30p+0,
-	     0x1.03613f2812dafp+0,
-	     0x1.0209a65e29545p+0,
-	     0x1.00abcf3e187a9p+0,
-	     0x1.fe8fb01a47307p-1,
-	     0x1.fbbbbef34b4b2p-1,
-	     0x1.f8dc092d58ff8p-1,
-	     0x1.f5f0cdaf15313p-1,
-	     0x1.f2fa4c16c0019p-1,
-	     0x1.eff8c4b1375dbp-1,
-	     0x1.ecec7870ebca7p-1,
-	     0x1.e9d5a8e4c934ep-1,
-	     0x1.e6b4982f158b9p-1,
-	     0x1.e38988fc46e72p-1,
-	     0x1.e054be79d3042p-1,
-	     0x1.dd167c4cf9d2ap-1,
-	     0x1.d9cf06898cdafp-1,
-	     0x1.d67ea1a8b5368p-1,
-	     0x1.d325927fb9d89p-1,
-	     0x1.cfc41e36c7df9p-1,
-	     0x1.cc5a8a3fbea40p-1,
-	     0x1.c8e91c4d01368p-1,
-	     0x1.c5701a484ef9dp-1,
-	     0x1.c1efca49a5011p-1,
-	     0x1.be68728e29d5dp-1,
-	     0x1.bada596f25436p-1,
-	     0x1.b745c55905bf8p-1,
-	     0x1.b3aafcc27502ep-1,
-	     0x1.b00a46237d5bep-1,
-	     0x1.ac63e7ecc1411p-1,
-	     0x1.a8b8287ec6a09p-1,
-	     0x1.a5074e2157620p-1,
-	     0x1.a1519efaf889ep-1,
-	     0x1.9d97610879642p-1,
-	     0x1.99d8da149c13fp-1,
-	     0x1.96164fafd8de3p-1,
-	     0x1.925007283d7aap-1,
-	     0x1.8e86458169af8p-1,
-	     0x1.8ab94f6caa71dp-1,
-	     0x1.86e9694134b9ep-1,
-	     0x1.8316d6f48133dp-1,
-	     0x1.7f41dc12c9e89p-1,
-	     0x1.7b6abbb7aaf19p-1,
-	     0x1.7791b886e7403p-1,
-	     0x1.73b714a552763p-1,
-	     0x1.6fdb11b1e0c34p-1,
-	     0x1.6bfdf0beddaf5p-1,
-	     0x1.681ff24b4ab04p-1,
-	     0x1.6441563c665d4p-1,
-	     0x1.60625bd75d07bp-1,
-	     0x1.5c8341bb23767p-1,
-	     0x1.58a445da7c74cp-1,
-	     0x1.54c5a57629db0p-1,
-	     0x1.50e79d1749ac9p-1,
-	     0x1.4d0a6889dfd9fp-1,
-	     0x1.492e42d78d2c5p-1,
-	     0x1.4553664273d24p-1,
-	     0x1.417a0c4049fd0p-1,
-	     0x1.3da26d759aef5p-1,
-	     0x1.39ccc1b136d5ap-1,
-	     0x1.35f93fe7d1b3dp-1,
-	     0x1.32281e2fd1a92p-1,
-	     0x1.2e5991bd4cbfcp-1,
-	     0x1.2a8dcede3673bp-1,
-	     0x1.26c508f6bd0ffp-1,
-	     0x1.22ff727dd6f7bp-1,
-	     0x1.1f3d3cf9ffe5ap-1,
-	     0x1.1b7e98fe26217p-1,
-	     0x1.17c3b626c7a11p-1,
-	     0x1.140cc3173f007p-1,
-	     0x1.1059ed7740313p-1,
-	     0x1.0cab61f084b93p-1,
-	     0x1.09014c2ca74dap-1,
-	     0x1.055bd6d32e8d7p-1,
-	     0x1.01bb2b87c6968p-1,
-	     0x1.fc3ee5d1524b0p-2,
-	     0x1.f511a91a67d2ap-2,
-	     0x1.edeeee0959518p-2,
-	     0x1.e6d6ffaa65a25p-2,
-	     0x1.dfca26f5bbf88p-2,
-	     0x1.d8c8aace11e63p-2,
-	     0x1.d1d2cfff91594p-2,
-	     0x1.cae8d93f1d7b6p-2,
-	     0x1.c40b0729ed547p-2,
-	     0x1.bd3998457afdap-2,
-	     0x1.b674c8ffc6283p-2,
-	     0x1.afbcd3afe8ab6p-2,
-	     0x1.a911f096fbc26p-2,
-	     0x1.a27455e14c93cp-2,
-	     0x1.9be437a7de946p-2,
-	     0x1.9561c7f23a47bp-2,
-	     0x1.8eed36b886d93p-2,
-	     0x1.8886b1e5ecfd1p-2,
-	     0x1.822e655b417e6p-2,
-	     0x1.7be47af1f5d89p-2,
-	     0x1.75a91a7f4d2edp-2,
-	     0x1.6f7c69d7d3ef8p-2,
-	     0x1.695e8cd31867ep-2,
-	     0x1.634fa54fa285fp-2,
-	     0x1.5d4fd33729015p-2,
-	     0x1.575f3483021c3p-2,
-	     0x1.517de540ce2a3p-2,
-	     0x1.4babff975a04cp-2,
-	     0x1.45e99bcbb7915p-2,
-	     0x1.4036d0468a7a2p-2,
-	     0x1.3a93b1998736cp-2,
-	     0x1.35005285227f1p-2,
-	     0x1.2f7cc3fe6f423p-2,
-	     0x1.2a09153529381p-2,
-	     0x1.24a55399ea239p-2,
-	     0x1.1f518ae487dc8p-2,
-	     0x1.1a0dc51a9934dp-2,
-	     0x1.14da0a961fd14p-2,
-	     0x1.0fb6620c550afp-2,
-	     0x1.0aa2d09497f2bp-2,
-	     0x1.059f59af7a906p-2,
-	     0x1.00abff4dec7a3p-2,
-	     0x1.f79183b101c5bp-3,
-	     0x1.edeb406d9c824p-3,
-	     0x1.e4652fadcb6b2p-3,
-	     0x1.daff4969c0b04p-3,
-	     0x1.d1b982c501370p-3,
-	     0x1.c893ce1dcbef7p-3,
-	     0x1.bf8e1b1ca2279p-3,
-	     0x1.b6a856c3ed54fp-3,
-	     0x1.ade26b7fbed95p-3,
-	     0x1.a53c4135a6526p-3,
-	     0x1.9cb5bd549b111p-3,
-	     0x1.944ec2e4f5630p-3,
-	     0x1.8c07329874652p-3,
-	     0x1.83deeada4d25ap-3,
-	     0x1.7bd5c7df3fe9cp-3,
-	     0x1.73eba3b5b07b7p-3,
-	     0x1.6c205655be71fp-3,
-	     0x1.6473b5b15a7a1p-3,
-	     0x1.5ce595c455b0ap-3,
-	     0x1.5575c8a468361p-3,
-	     0x1.4e241e912c305p-3,
-	     0x1.46f066040a832p-3,
-	     0x1.3fda6bc016994p-3,
-	     0x1.38e1fae1d6a9dp-3,
-	     0x1.3206dceef5f87p-3,
-	     0x1.2b48d9e5dea1cp-3,
-	     0x1.24a7b84d38971p-3,
-	     0x1.1e233d434b813p-3,
-	     0x1.17bb2c8d41535p-3,
-	     0x1.116f48a6476ccp-3,
-	     0x1.0b3f52ce8c383p-3,
-	     0x1.052b0b1a174eap-3,
-	     0x1.fe6460fef4680p-4,
-	     0x1.f2a901ccafb37p-4,
-	     0x1.e723726b824a9p-4,
-	     0x1.dbd32ac4c99b0p-4,
-	     0x1.d0b7a0f921e7cp-4,
-	     0x1.c5d0497c09e74p-4,
-	     0x1.bb1c972f23e50p-4,
-	     0x1.b09bfb7d11a83p-4,
-	     0x1.a64de673e8837p-4,
-	     0x1.9c31c6df3b1b8p-4,
-	     0x1.92470a61b6965p-4,
-	     0x1.888d1d8e510a3p-4,
-	     0x1.7f036c0107294p-4,
-	     0x1.75a96077274bap-4,
-	     0x1.6c7e64e7281cbp-4,
-	     0x1.6381e2980956bp-4,
-	     0x1.5ab342383d177p-4,
-	     0x1.5211ebf41880bp-4,
-	     0x1.499d478bca735p-4,
-	     0x1.4154bc68d75c3p-4,
-	     0x1.3937b1b319259p-4,
-	     0x1.31458e6542847p-4,
-	     0x1.297db960e4f63p-4,
-	     0x1.21df9981f8e53p-4,
-	     0x1.1a6a95b1e786fp-4,
-	     0x1.131e14fa1625dp-4,
-	     0x1.0bf97e95f2a64p-4,
-	     0x1.04fc3a0481321p-4,
-	     0x1.fc4b5e32d6259p-5,
-	     0x1.eeea8c1b1db93p-5,
-	     0x1.e1d4cf1e2450ap-5,
-	     0x1.d508f9a1ea64ep-5,
-	     0x1.c885df3451a07p-5,
-	     0x1.bc4a54a84e834p-5,
-	     0x1.b055303221015p-5,
-	     0x1.a4a549829587ep-5,
-	     0x1.993979e14fffdp-5,
-	     0x1.8e109c4622913p-5,
-	     0x1.83298d717210ep-5,
-	     0x1.78832c03aa2b1p-5,
-	     0x1.6e1c5893c380bp-5,
-	     0x1.63f3f5c4de13bp-5,
-	     0x1.5a08e85af27e0p-5,
-	     0x1.505a174e9c929p-5,
-	     0x1.46e66be002240p-5,
-	     0x1.3dacd1a8d8ccdp-5,
-	     0x1.34ac36ad8dafep-5,
-	     0x1.2be38b6d92415p-5,
-	     0x1.2351c2f2d1449p-5,
-	     0x1.1af5d2e04f3f6p-5,
-	     0x1.12ceb37ff9bc3p-5,
-	     0x1.0adb5fcfa8c75p-5,
-	     0x1.031ad58d56279p-5,
-	     0x1.f7182a851bca2p-6,
-	     0x1.e85c449e377f2p-6,
-	     0x1.da0005e5f28dfp-6,
-	     0x1.cc0180af00a8bp-6,
-	     0x1.be5ecd2fcb5f9p-6,
-	     0x1.b1160991ff737p-6,
-	     0x1.a4255a00b9f03p-6,
-	     0x1.978ae8b55ce1bp-6,
-	     0x1.8b44e6031383ep-6,
-	     0x1.7f5188610ddc8p-6,
-	     0x1.73af0c737bb45p-6,
-	     0x1.685bb5134ef13p-6,
-	     0x1.5d55cb54cd53ap-6,
-	     0x1.529b9e8cf9a1ep-6,
-	     0x1.482b8455dc491p-6,
-	     0x1.3e03d891b37dep-6,
-	     0x1.3422fd6d12e2bp-6,
-	     0x1.2a875b5ffab56p-6,
-	     0x1.212f612dee7fbp-6,
-	     0x1.181983e5133ddp-6,
-	     0x1.0f443edc5ce49p-6,
-	     0x1.06ae13b0d3255p-6,
-	     0x1.fcab1483ea7fcp-7,
-	     0x1.ec72615a894c4p-7,
-	     0x1.dcaf3691fc448p-7,
-	     0x1.cd5ec93c12431p-7,
-	     0x1.be7e5ac24963bp-7,
-	     0x1.b00b38d6b3575p-7,
-	     0x1.a202bd6372dcep-7,
-	     0x1.94624e78e0fafp-7,
-	     0x1.87275e3a6869dp-7,
-	     0x1.7a4f6aca256cbp-7,
-	     0x1.6dd7fe3358230p-7,
-	     0x1.61beae53b72b7p-7,
-	     0x1.56011cc3b036dp-7,
-	     0x1.4a9cf6bda3f4cp-7,
-	     0x1.3f8ff5042a88ep-7,
-	     0x1.34d7dbc76d7e5p-7,
-	     0x1.2a727a89a3f14p-7,
-	     0x1.205dac02bd6b9p-7,
-	     0x1.1697560347b25p-7,
-	     0x1.0d1d69569b82dp-7,
-	     0x1.03ede1a45bfeep-7,
-	     0x1.f60d8aa2a88f2p-8,
-	     0x1.e4cc4abf7d065p-8,
-	     0x1.d4143a9dfe965p-8,
-	     0x1.c3e1a5f5c077cp-8,
-	     0x1.b430ecf4a83a8p-8,
-	     0x1.a4fe83fb9db25p-8,
-	     0x1.9646f35a76623p-8,
-	     0x1.8806d70b2fc36p-8,
-	     0x1.7a3ade6c8b3e4p-8,
-	     0x1.6cdfcbfc1e263p-8,
-	     0x1.5ff2750fe7820p-8,
-	     0x1.536fc18f7ce5cp-8,
-	     0x1.4754abacdf1dcp-8,
-	     0x1.3b9e3f9d06e3fp-8,
-	     0x1.30499b503957fp-8,
-	     0x1.2553ee2a336bfp-8,
-	     0x1.1aba78ba3af89p-8,
-	     0x1.107a8c7323a6ep-8,
-	     0x1.06918b6355624p-8,
-	     0x1.f9f9cfd9c3035p-9,
-	     0x1.e77448fb66bb9p-9,
-	     0x1.d58da68fd1170p-9,
-	     0x1.c4412bf4b8f0bp-9,
-	     0x1.b38a3af2e55b4p-9,
-	     0x1.a3645330550ffp-9,
-	     0x1.93cb11a30d765p-9,
-	     0x1.84ba3004a50d0p-9,
-	     0x1.762d84469c18fp-9,
-	     0x1.6821000795a03p-9,
-	     0x1.5a90b00981d93p-9,
-	     0x1.4d78bba8ca5fdp-9,
-	     0x1.40d564548fad7p-9,
-	     0x1.34a305080681fp-9,
-	     0x1.28de11c5031ebp-9,
-	     0x1.1d83170fbf6fbp-9,
-	     0x1.128eb96be8798p-9,
-	     0x1.07fdb4dafea5fp-9,
-	     0x1.fb99b8b8279e1p-10,
-	     0x1.e7f232d9e2630p-10,
-	     0x1.d4fed7195d7e8p-10,
-	     0x1.c2b9cf7f893bfp-10,
-	     0x1.b11d702b3deb1p-10,
-	     0x1.a024365f771bdp-10,
-	     0x1.8fc8c794b03b5p-10,
-	     0x1.8005f08d6f1efp-10,
-	     0x1.70d6a46e07ddap-10,
-	     0x1.6235fbd7a4345p-10,
-	     0x1.541f340697987p-10,
-	     0x1.468dadf4080abp-10,
-	     0x1.397ced7af2b15p-10,
-	     0x1.2ce898809244ep-10,
-	     0x1.20cc76202c5fap-10,
-	     0x1.15246dda49d47p-10,
-	     0x1.09ec86c75d497p-10,
-	     0x1.fe41cd9bb4eeep-11,
-	     0x1.e97ba3b77f306p-11,
-	     0x1.d57f524723822p-11,
-	     0x1.c245d4b998479p-11,
-	     0x1.afc85e0f82e12p-11,
-	     0x1.9e005769dbc1dp-11,
-	     0x1.8ce75e9f6f8a0p-11,
-	     0x1.7c7744d9378f7p-11,
-	     0x1.6caa0d3582fe9p-11,
-	     0x1.5d79eb71e893bp-11,
-	     0x1.4ee1429bf7cc0p-11,
-	     0x1.40daa3c89f5b6p-11,
-	     0x1.3360ccd23db3ap-11,
-	     0x1.266ea71d4f71ap-11,
-	     0x1.19ff4663ae9dfp-11,
-	     0x1.0e0de78654d1ep-11,
-	     0x1.0295ef6591848p-11,
-	     0x1.ef25d37f49fe1p-12,
-	     0x1.da01102b5f851p-12,
-	     0x1.c5b5412dcafadp-12,
-	     0x1.b23a5a23e4210p-12,
-	     0x1.9f8893d8fd1c1p-12,
-	     0x1.8d986a4187285p-12,
-	     0x1.7c629a822bc9ep-12,
-	     0x1.6be02102b3520p-12,
-	     0x1.5c0a378c90bcap-12,
-	     0x1.4cda5374ea275p-12,
-	     0x1.3e4a23d1f4702p-12,
-	     0x1.30538fbb77ecdp-12,
-	     0x1.22f0b496539bdp-12,
-	     0x1.161be46ad3b50p-12,
-	     0x1.09cfa445b00ffp-12,
-	     0x1.fc0d55470cf51p-13,
-	     0x1.e577bbcd49935p-13,
-	     0x1.cfd4a5adec5bfp-13,
-	     0x1.bb1a9657ce465p-13,
-	     0x1.a740684026555p-13,
-	     0x1.943d4a1d1ed39p-13,
-	     0x1.8208bc334a6a5p-13,
-	     0x1.709a8db59f25cp-13,
-	     0x1.5feada379d8b7p-13,
-	     0x1.4ff207314a102p-13,
-	     0x1.40a8c1949f75ep-13,
-	     0x1.3207fb7420eb9p-13,
-	     0x1.2408e9ba3327fp-13,
-	     0x1.16a501f0e42cap-13,
-	     0x1.09d5f819c9e29p-13,
-	     0x1.fb2b792b40a22p-14,
-	     0x1.e3bcf436a1a95p-14,
-	     0x1.cd55277c18d05p-14,
-	     0x1.b7e94604479dcp-14,
-	     0x1.a36eec00926ddp-14,
-	     0x1.8fdc1b2dcf7b9p-14,
-	     0x1.7d2737527c3f9p-14,
-	     0x1.6b4702d7d5849p-14,
-	     0x1.5a329b7d30748p-14,
-	     0x1.49e17724f4d41p-14,
-	     0x1.3a4b60ba9aa4dp-14,
-	     0x1.2b6875310f785p-14,
-	     0x1.1d312098e9dbap-14,
-	     0x1.0f9e1b4dd36dfp-14,
-	     0x1.02a8673a94691p-14,
-	     0x1.ec929a665b449p-15,
-	     0x1.d4f4b4c8e09edp-15,
-	     0x1.be6abbb10a5aap-15,
-	     0x1.a8e8cc1fadef6p-15,
-	     0x1.94637d5bacfdbp-15,
-	     0x1.80cfdc72220cfp-15,
-	     0x1.6e2367dc27f95p-15,
-	     0x1.5c540b4936fd2p-15,
-	     0x1.4b581b8d170fcp-15,
-	     0x1.3b2652b06c2b2p-15,
-	     0x1.2bb5cc22e5db6p-15,
-	     0x1.1cfe010e2052dp-15,
-	     0x1.0ef6c4c84a0fep-15,
-	     0x1.01984165a5f36p-15,
-	     0x1.e9b5e8d00ce76p-16,
-	     0x1.d16f5716c6c1ap-16,
-	     0x1.ba4f035d60e02p-16,
-	     0x1.a447b7b03f045p-16,
-	     0x1.8f4ccca7fc90dp-16,
-	     0x1.7b5223dac7336p-16,
-	     0x1.684c227fcacefp-16,
-	     0x1.562fac4329b48p-16,
-	     0x1.44f21e49054f2p-16,
-	     0x1.34894a5e24657p-16,
-	     0x1.24eb7254ccf83p-16,
-	     0x1.160f438c70913p-16,
-	     0x1.07ebd2a2d2844p-16,
-	     0x1.f4f12e9ab070ap-17,
-	     0x1.db5ad0b27805cp-17,
-	     0x1.c304efa2c6f4ep-17,
-	     0x1.abe09e9144b5ep-17,
-	     0x1.95df988e76644p-17,
-	     0x1.80f439b4ee04bp-17,
-	     0x1.6d11788a69c64p-17,
-	     0x1.5a2adfa0b4bc4p-17,
-	     0x1.4834877429b8fp-17,
-	     0x1.37231085c7d9ap-17,
-	     0x1.26eb9daed6f7ep-17,
-	     0x1.1783ceac28910p-17,
-	     0x1.08e1badf0fcedp-17,
-	     0x1.f5f7d88472604p-18,
-	     0x1.db92b5212fb8dp-18,
-	     0x1.c282cd3957edap-18,
-	     0x1.aab7abace48dcp-18,
-	     0x1.94219bfcb4928p-18,
-	     0x1.7eb1a2075864dp-18,
-	     0x1.6a597219a93d9p-18,
-	     0x1.570b69502f313p-18,
-	     0x1.44ba864670882p-18,
-	     0x1.335a62115bce2p-18,
-	     0x1.22df298214423p-18,
-	     0x1.133d96ae7e0ddp-18,
-	     0x1.046aeabcfcdecp-18,
-	     0x1.ecb9cfe1d8642p-19,
-	     0x1.d21397ead99cbp-19,
-	     0x1.b8d094c86d374p-19,
-	     0x1.a0df0f0c626dcp-19,
-	     0x1.8a2e269750a39p-19,
-	     0x1.74adc8f4064d3p-19,
-	     0x1.604ea819f007cp-19,
-	     0x1.4d0231928c6f9p-19,
-	     0x1.3aba85fe22e1fp-19,
-	     0x1.296a70f414053p-19,
-	     0x1.1905613b3abf2p-19,
-	     0x1.097f6156f32c5p-19,
-	     0x1.f59a20caf6695p-20,
-	     0x1.d9c73698fb1dcp-20,
-	     0x1.bf716c6168baep-20,
-	     0x1.a6852c6b58392p-20,
-	     0x1.8eefd70594a88p-20,
-	     0x1.789fb715aae95p-20,
-	     0x1.6383f726a8e04p-20,
-	     0x1.4f8c96f26a26ap-20,
-	     0x1.3caa61607f920p-20,
-	     0x1.2acee2f5ecdb8p-20,
-	     0x1.19ec60b1242edp-20,
-	     0x1.09f5cf4dd2877p-20,
-	     0x1.f5bd95d8730d8p-21,
-	     0x1.d9371e2ff7c35p-21,
-	     0x1.be41de54d155ap-21,
-	     0x1.a4c89e08ef4f3p-21,
-	     0x1.8cb738399b12cp-21,
-	     0x1.75fa8dbc84becp-21,
-	     0x1.608078a70dcbcp-21,
-	     0x1.4c37c0394d094p-21,
-	     0x1.39100d5687bfep-21,
-	     0x1.26f9df8519bd6p-21,
-	     0x1.15e6827001f18p-21,
-	     0x1.05c803e4831c1p-21,
-	     0x1.ed22548cffd35p-22,
-	     0x1.d06ad6ecdf971p-22,
-	     0x1.b551c847fbc96p-22,
-	     0x1.9bc09f112b494p-22,
-	     0x1.83a1ff0aa239dp-22,
-	     0x1.6ce1aa3fd7bddp-22,
-	     0x1.576c72b514859p-22,
-	     0x1.43302cc4a0da8p-22,
-	     0x1.301ba221dc9bbp-22,
-	     0x1.1e1e857adc568p-22,
-	     0x1.0d2966b1746f7p-22,
-	     0x1.fa5b4f49cc6b2p-23,
-	     0x1.dc3ae30b55c16p-23,
-	     0x1.bfd7555a3bd68p-23,
-	     0x1.a517d9e61628ap-23,
-	     0x1.8be4f8f6c951fp-23,
-	     0x1.74287ded49339p-23,
-	     0x1.5dcd669f2cd34p-23,
-	     0x1.48bfd38302870p-23,
-	     0x1.34ecf8a3c124ap-23,
-	     0x1.22430f521cbcfp-23,
-	     0x1.10b1488aeb235p-23,
-	     0x1.0027c00a263a6p-23,
-	     0x1.e12ee004efc37p-24,
-	     0x1.c3e44ae32b16bp-24,
-	     0x1.a854ea14102a8p-24,
-	     0x1.8e6761569f45dp-24,
-	     0x1.7603bac345f65p-24,
-	     0x1.5f1353cdad001p-24,
-	     0x1.4980cb3c80949p-24,
-	     0x1.3537f00b6ad4dp-24,
-	     0x1.2225b12bffc68p-24,
-	     0x1.10380e1adb7e9p-24,
-	     0x1.febc107d5efaap-25,
-	     0x1.df0f2a0ee6946p-25,
-	     0x1.c14b2188bcee4p-25,
-	     0x1.a553644f7f07dp-25,
-	     0x1.8b0cfce0579dfp-25,
-	     0x1.725e7c5dd20f7p-25,
-	     0x1.5b2fe547a1340p-25,
-	     0x1.456a974e92e93p-25,
-	     0x1.30f93c3699078p-25,
-	     0x1.1dc7b5b978cf8p-25,
-	     0x1.0bc30c5d52f15p-25,
-	     0x1.f5b2be65a0c7fp-26,
-	     0x1.d5f3a8dea7357p-26,
-	     0x1.b82915b03515bp-26,
-	     0x1.9c3517e789488p-26,
-	     0x1.81fb7df06136ep-26,
-	     0x1.6961b8d641d06p-26,
-	     0x1.524ec4d916caep-26,
-	     0x1.3cab1343d18d1p-26,
-	     0x1.2860757487a01p-26,
-	     0x1.155a09065d4f7p-26,
-	     0x1.0384250e4c9fcp-26,
-	     0x1.e59890b926c78p-27,
-	     0x1.c642116a8a9e3p-27,
-	     0x1.a8e405e651ab6p-27,
-	     0x1.8d5f98114f872p-27,
-	     0x1.7397c5a66e307p-27,
-	     0x1.5b71456c5a4c4p-27,
-	     0x1.44d26de513197p-27,
-	     0x1.2fa31d6371537p-27,
-	     0x1.1bcca373b7b43p-27,
-	     0x1.0939ab853339fp-27,
-	     0x1.efac5187b2863p-28,
-	     0x1.cf1e86235d0e6p-28,
-	     0x1.b0a68a2128babp-28,
-	     0x1.9423165bc4444p-28,
-	     0x1.7974e743dea3cp-28,
-	     0x1.607e9eacd1050p-28,
-	     0x1.4924a74dec728p-28,
-	     0x1.334d19e0c2160p-28,
-	     0x1.1edfa3c5f5ccap-28,
-	     0x1.0bc56f1b54701p-28,
-	     0x1.f3d2185e047d9p-29,
-	     0x1.d26cb87945e87p-29,
-	     0x1.b334fac4b9f99p-29,
-	     0x1.96076f7918d1cp-29,
-	     0x1.7ac2d72fc2c63p-29,
-	     0x1.614801550319ep-29,
-	     0x1.4979ac8b28926p-29,
-	     0x1.333c68e2d0548p-29,
-	     0x1.1e767bce37dd7p-29,
-	     0x1.0b0fc5b6d05a0p-29,
-	     0x1.f1e3523b41d7dp-30,
-	     0x1.d00de6608effep-30,
-	     0x1.b0778b7b3301ap-30,
-	     0x1.92fb04ec0f6cfp-30,
-	     0x1.77756ec9f78fap-30,
-	     0x1.5dc61922d5a06p-30,
-	     0x1.45ce65699ff6dp-30,
-	     0x1.2f71a5f159970p-30,
-	     0x1.1a94ff571654fp-30,
-	     0x1.071f4bbea09ecp-30,
-	     0x1.e9f1ff8ddd774p-31,
-	     0x1.c818223a202c7p-31,
-	     0x1.a887bd2b4404dp-31,
-	     0x1.8b1a336c5eb6bp-31,
-	     0x1.6fab63324088ap-31,
-	     0x1.56197e30205bap-31,
-	     0x1.3e44e45301b92p-31,
-	     0x1.281000bfe4c3fp-31,
-	     0x1.135f28f2d50b4p-31,
-	     0x1.00187dded5975p-31,
-	     0x1.dc479de0ef001p-32,
-	     0x1.bad4fdad3caa1p-32,
-	     0x1.9baed3ed27ab8p-32,
-	     0x1.7ead9ce4285bbp-32,
-	     0x1.63ac6b4edc88ep-32,
-	     0x1.4a88be2a6390cp-32,
-	     0x1.332259185f1a0p-32,
-	     0x1.1d5b1f3793044p-32,
-	     0x1.0916f04b6e18bp-32,
-	     0x1.ec77101de6926p-33,
-	     0x1.c960bf23153e0p-33,
-	     0x1.a8bd20fc65ef7p-33,
-	     0x1.8a61745ec7d1dp-33,
-	     0x1.6e25d0e756261p-33,
-	     0x1.53e4f7d1666cbp-33,
-	     0x1.3b7c27a7ddb0ep-33,
-	     0x1.24caf2c32af14p-33,
-	     0x1.0fb3186804d0fp-33,
-	     0x1.f830c0bb41fd7p-34,
-	     0x1.d3c0f1a91c846p-34,
-	     0x1.b1e5acf351d87p-34,
-	     0x1.92712d259ce66p-34,
-	     0x1.7538c60a04476p-34,
-	     0x1.5a14b04b47879p-34,
-	     0x1.40dfd87456f4cp-34,
-	     0x1.2977b1172b9d5p-34,
-	     0x1.13bc07e891491p-34,
-	     0x1.ff1dbb4300811p-35,
-	     0x1.d9a880f306bd8p-35,
-	     0x1.b6e45220b55e0p-35,
-	     0x1.96a0b33f2c4dap-35,
-	     0x1.78b07e9e924acp-35,
-	     0x1.5ce9ab1670dd2p-35,
-	     0x1.4325167006bb0p-35,
-	     0x1.2b3e53538ff3fp-35,
-	     0x1.15137a7f44864p-35,
-	     0x1.0084ff125639dp-35,
-	     0x1.daeb0b7311ec7p-36,
-	     0x1.b7937d1c40c52p-36,
-	     0x1.96d082f59ab06p-36,
-	     0x1.7872d9fa10aadp-36,
-	     0x1.5c4e8e37bc7d0p-36,
-	     0x1.423ac0df49a40p-36,
-	     0x1.2a117230ad284p-36,
-	     0x1.13af4f04f9998p-36,
-	     0x1.fde703724e560p-37,
-	     0x1.d77f0c82e7641p-37,
-	     0x1.b3ee02611d7ddp-37,
-	     0x1.92ff33023d5bdp-37,
-	     0x1.7481a9e69f53fp-37,
-	     0x1.5847eda620959p-37,
-	     0x1.3e27c1fcc74bdp-37,
-	     0x1.25f9ee0b923dcp-37,
-	     0x1.0f9a0686531ffp-37,
-	     0x1.f5cc7718082afp-38,
-	     0x1.cf7e53d6a2ca5p-38,
-	     0x1.ac0f5f3229372p-38,
-	     0x1.8b498644847eap-38,
-	     0x1.6cfa9bcca59dcp-38,
-	     0x1.50f411d4fd2cdp-38,
-	     0x1.370ab8327af5ep-38,
-	     0x1.1f167f88c6b6ep-38,
-	     0x1.08f24085d4597p-38,
-	     0x1.e8f70e181d619p-39,
-	     0x1.c324c20e337dcp-39,
-	     0x1.a03261574b54ep-39,
-	     0x1.7fe903cdf5855p-39,
-	     0x1.6215c58da3450p-39,
-	     0x1.46897d4b69fc6p-39,
-	     0x1.2d1877d731b7bp-39,
-	     0x1.159a386b11517p-39,
-	     0x1.ffd27ae9393cep-40,
-	     0x1.d7c593130dd0bp-40,
-	     0x1.b2cd607c79bcfp-40,
-	     0x1.90ae4d3405651p-40,
-	     0x1.71312dd1759e2p-40,
-	     0x1.5422ef5d8949dp-40,
-	     0x1.39544b0ecc957p-40,
-	     0x1.20997f73e73ddp-40,
-	     0x1.09ca0eaacd277p-40,
-	     0x1.e9810295890ecp-41,
-	     0x1.c2b45b5aa4a1dp-41,
-	     0x1.9eee068fa7596p-41,
-	     0x1.7df2b399c10a8p-41,
-	     0x1.5f8b87a31bd85p-41,
-	     0x1.4385c96e9a2d9p-41,
-	     0x1.29b2933ef4cbcp-41,
-	     0x1.11e68a6378f8ap-41,
-	     0x1.f7f338086a86bp-42,
-	     0x1.cf8d7d9ce040ap-42,
-	     0x1.aa577251ae484p-42,
-	     0x1.8811d739efb5ep-42,
-	     0x1.68823e52970bep-42,
-	     0x1.4b72ae68e8b4cp-42,
-	     0x1.30b14dbe876bcp-42,
-	     0x1.181012ef86610p-42,
-	     0x1.01647ba798744p-42,
-	     0x1.d90e917701675p-43,
-	     0x1.b2a87e86d0c8ap-43,
-	     0x1.8f53dcb377293p-43,
-	     0x1.6ed2f2515e933p-43,
-	     0x1.50ecc9ed47f19p-43,
-	     0x1.356cd5ce7799ep-43,
-	     0x1.1c229a587ab78p-43,
-	     0x1.04e15ecc7f3f6p-43,
-	     0x1.deffc7e6a6017p-44,
-	     0x1.b7b040832f310p-44,
-	     0x1.938e021f36d76p-44,
-	     0x1.7258610b3b233p-44,
-	     0x1.53d3bfc82a909p-44,
-	     0x1.37c92babdc2fdp-44,
-	     0x1.1e06010120f6ap-44,
-	     0x1.065b9616170d4p-44,
-	     0x1.e13dd96b3753ap-45,
-	     0x1.b950d32467392p-45,
-	     0x1.94a72263259a5p-45,
-	     0x1.72fd93e036cdcp-45,
-	     0x1.54164576929abp-45,
-	     0x1.37b83c521fe96p-45,
-	     0x1.1daf033182e96p-45,
-	     0x1.05ca50205d26ap-45,
-	     0x1.dfbb6235639fap-46,
-	     0x1.b7807e294781fp-46,
-	     0x1.9298add70a734p-46,
-	     0x1.70beaf9c7ffb6p-46,
-	     0x1.51b2cd6709222p-46,
-	     0x1.353a6cf7f7fffp-46,
-	     0x1.1b1fa8cbe84a7p-46,
-	     0x1.0330f0fd69921p-46,
-	     0x1.da81670f96f9bp-47,
-	     0x1.b24a16b4d09aap-47,
-	     0x1.8d6eeb6efdbd6p-47,
-	     0x1.6ba91ac734785p-47,
-	     0x1.4cb7966770ab5p-47,
-	     0x1.305e9721d0981p-47,
-	     0x1.1667311fff70ap-47,
-	     0x1.fd3de10d62855p-48,
-	     0x1.d1aefbcd48d0cp-48,
-	     0x1.a9cc93c25aca9p-48,
-	     0x1.85487ee3ea735p-48,
-	     0x1.63daf8b4b1e0cp-48,
-	     0x1.45421e69a6ca1p-48,
-	     0x1.294175802d99ap-48,
-	     0x1.0fa17bf41068fp-48,
-	     0x1.f05e82aae2bb9p-49,
-	     0x1.c578101b29058p-49,
-	     0x1.9e39dc5dd2f7cp-49,
-	     0x1.7a553a728bbf2p-49,
-	     0x1.5982008db1304p-49,
-	     0x1.3b7e00422e51bp-49,
-	     0x1.200c898d9ee3ep-49,
-	     0x1.06f5f7eb65a56p-49,
-	     0x1.e00e9148a1d25p-50,
-	     0x1.b623734024e92p-50,
-	     0x1.8fd4e01891bf8p-50,
-	     0x1.6cd44c7470d89p-50,
-	     0x1.4cd9c04158cd7p-50,
-	     0x1.2fa34bf5c8344p-50,
-	     0x1.14f4890ff2461p-50,
-	     0x1.f92c49dfa4df5p-51,
-	     0x1.ccaaea71ab0dfp-51,
-	     0x1.a40829f001197p-51,
-	     0x1.7eef13b59e96cp-51,
-	     0x1.5d11e1a252bf5p-51,
-	     0x1.3e296303b2297p-51,
-	     0x1.21f47009f43cep-51,
-	     0x1.083768c5e4541p-51,
-	     0x1.e1777d831265ep-52,
-	     0x1.b69f10b0191b5p-52,
-	     0x1.8f8a3a05b5b52p-52,
-	     0x1.6be573c40c8e7p-52,
-	     0x1.4b645ba991fdbp-52,
-	     0x1.2dc119095729fp-52,
-  },
-};
diff --git a/pl/math/sv_erff_data.c b/pl/math/sv_erff_data.c
deleted file mode 100644
index 154d3c1888748d..00000000000000
--- a/pl/math/sv_erff_data.c
+++ /dev/null
@@ -1,1046 +0,0 @@
-/*
- * Data for approximation of vector erff.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Lookup table used in SVE erff.
-   For each possible rounded input r (multiples of 1/128), between
-   r = 0.0 and r = 4.0 (513 values):
-   - __erff_data.erf contains the values of erf(r),
-   - __erff_data.scale contains the values of 2/sqrt(pi)*exp(-r^2).
-   Note that indices 0 and 1 are never hit by the algorithm, since lookup is
-   performed only for x >= 1/64-1/512.  */
-const struct sv_erff_data __sv_erff_data = {
-  .erf = { 0x0.000000p+0,
-	   0x1.20dbf4p-7,
-	   0x1.20d770p-6,
-	   0x1.b137e0p-6,
-	   0x1.20c564p-5,
-	   0x1.68e5d4p-5,
-	   0x1.b0fafep-5,
-	   0x1.f902a8p-5,
-	   0x1.207d48p-4,
-	   0x1.44703ep-4,
-	   0x1.68591ap-4,
-	   0x1.8c36bep-4,
-	   0x1.b00812p-4,
-	   0x1.d3cbf8p-4,
-	   0x1.f7815ap-4,
-	   0x1.0d9390p-3,
-	   0x1.1f5e1ap-3,
-	   0x1.311fc2p-3,
-	   0x1.42d7fcp-3,
-	   0x1.548642p-3,
-	   0x1.662a0cp-3,
-	   0x1.77c2d2p-3,
-	   0x1.895010p-3,
-	   0x1.9ad142p-3,
-	   0x1.ac45e4p-3,
-	   0x1.bdad72p-3,
-	   0x1.cf076ep-3,
-	   0x1.e05354p-3,
-	   0x1.f190aap-3,
-	   0x1.015f78p-2,
-	   0x1.09eed6p-2,
-	   0x1.127632p-2,
-	   0x1.1af54ep-2,
-	   0x1.236bf0p-2,
-	   0x1.2bd9dcp-2,
-	   0x1.343ed6p-2,
-	   0x1.3c9aa8p-2,
-	   0x1.44ed18p-2,
-	   0x1.4d35f0p-2,
-	   0x1.5574f4p-2,
-	   0x1.5da9f4p-2,
-	   0x1.65d4b8p-2,
-	   0x1.6df50ap-2,
-	   0x1.760abap-2,
-	   0x1.7e1594p-2,
-	   0x1.861566p-2,
-	   0x1.8e0a02p-2,
-	   0x1.95f336p-2,
-	   0x1.9dd0d2p-2,
-	   0x1.a5a2acp-2,
-	   0x1.ad6896p-2,
-	   0x1.b52264p-2,
-	   0x1.bccfecp-2,
-	   0x1.c47104p-2,
-	   0x1.cc0584p-2,
-	   0x1.d38d44p-2,
-	   0x1.db081cp-2,
-	   0x1.e275eap-2,
-	   0x1.e9d68ap-2,
-	   0x1.f129d4p-2,
-	   0x1.f86faap-2,
-	   0x1.ffa7eap-2,
-	   0x1.03693ap-1,
-	   0x1.06f794p-1,
-	   0x1.0a7ef6p-1,
-	   0x1.0dff50p-1,
-	   0x1.117894p-1,
-	   0x1.14eab4p-1,
-	   0x1.1855a6p-1,
-	   0x1.1bb95cp-1,
-	   0x1.1f15ccp-1,
-	   0x1.226ae8p-1,
-	   0x1.25b8a8p-1,
-	   0x1.28ff02p-1,
-	   0x1.2c3decp-1,
-	   0x1.2f755cp-1,
-	   0x1.32a54cp-1,
-	   0x1.35cdb4p-1,
-	   0x1.38ee8ap-1,
-	   0x1.3c07cap-1,
-	   0x1.3f196ep-1,
-	   0x1.42236ep-1,
-	   0x1.4525c8p-1,
-	   0x1.482074p-1,
-	   0x1.4b1372p-1,
-	   0x1.4dfebap-1,
-	   0x1.50e24cp-1,
-	   0x1.53be26p-1,
-	   0x1.569244p-1,
-	   0x1.595ea6p-1,
-	   0x1.5c2348p-1,
-	   0x1.5ee02ep-1,
-	   0x1.619556p-1,
-	   0x1.6442c0p-1,
-	   0x1.66e86ep-1,
-	   0x1.69865ep-1,
-	   0x1.6c1c98p-1,
-	   0x1.6eab18p-1,
-	   0x1.7131e6p-1,
-	   0x1.73b102p-1,
-	   0x1.762870p-1,
-	   0x1.789836p-1,
-	   0x1.7b0058p-1,
-	   0x1.7d60d8p-1,
-	   0x1.7fb9c0p-1,
-	   0x1.820b12p-1,
-	   0x1.8454d6p-1,
-	   0x1.869712p-1,
-	   0x1.88d1cep-1,
-	   0x1.8b050ep-1,
-	   0x1.8d30dep-1,
-	   0x1.8f5544p-1,
-	   0x1.91724ap-1,
-	   0x1.9387f6p-1,
-	   0x1.959652p-1,
-	   0x1.979d68p-1,
-	   0x1.999d42p-1,
-	   0x1.9b95e8p-1,
-	   0x1.9d8768p-1,
-	   0x1.9f71cap-1,
-	   0x1.a1551ap-1,
-	   0x1.a33162p-1,
-	   0x1.a506b0p-1,
-	   0x1.a6d50cp-1,
-	   0x1.a89c86p-1,
-	   0x1.aa5d26p-1,
-	   0x1.ac16fcp-1,
-	   0x1.adca14p-1,
-	   0x1.af767ap-1,
-	   0x1.b11c3cp-1,
-	   0x1.b2bb68p-1,
-	   0x1.b4540ap-1,
-	   0x1.b5e630p-1,
-	   0x1.b771e8p-1,
-	   0x1.b8f742p-1,
-	   0x1.ba764ap-1,
-	   0x1.bbef10p-1,
-	   0x1.bd61a2p-1,
-	   0x1.bece0ep-1,
-	   0x1.c03464p-1,
-	   0x1.c194b2p-1,
-	   0x1.c2ef08p-1,
-	   0x1.c44376p-1,
-	   0x1.c5920ap-1,
-	   0x1.c6dad2p-1,
-	   0x1.c81de2p-1,
-	   0x1.c95b46p-1,
-	   0x1.ca930ep-1,
-	   0x1.cbc54cp-1,
-	   0x1.ccf20cp-1,
-	   0x1.ce1962p-1,
-	   0x1.cf3b5cp-1,
-	   0x1.d0580cp-1,
-	   0x1.d16f7ep-1,
-	   0x1.d281c4p-1,
-	   0x1.d38ef0p-1,
-	   0x1.d49710p-1,
-	   0x1.d59a34p-1,
-	   0x1.d6986cp-1,
-	   0x1.d791cap-1,
-	   0x1.d8865ep-1,
-	   0x1.d97636p-1,
-	   0x1.da6162p-1,
-	   0x1.db47f4p-1,
-	   0x1.dc29fcp-1,
-	   0x1.dd0788p-1,
-	   0x1.dde0aap-1,
-	   0x1.deb570p-1,
-	   0x1.df85eap-1,
-	   0x1.e0522ap-1,
-	   0x1.e11a3ep-1,
-	   0x1.e1de36p-1,
-	   0x1.e29e22p-1,
-	   0x1.e35a12p-1,
-	   0x1.e41214p-1,
-	   0x1.e4c638p-1,
-	   0x1.e5768cp-1,
-	   0x1.e62322p-1,
-	   0x1.e6cc08p-1,
-	   0x1.e7714ap-1,
-	   0x1.e812fcp-1,
-	   0x1.e8b12ap-1,
-	   0x1.e94be4p-1,
-	   0x1.e9e336p-1,
-	   0x1.ea7730p-1,
-	   0x1.eb07e2p-1,
-	   0x1.eb9558p-1,
-	   0x1.ec1fa2p-1,
-	   0x1.eca6ccp-1,
-	   0x1.ed2ae6p-1,
-	   0x1.edabfcp-1,
-	   0x1.ee2a1ep-1,
-	   0x1.eea556p-1,
-	   0x1.ef1db4p-1,
-	   0x1.ef9344p-1,
-	   0x1.f00614p-1,
-	   0x1.f07630p-1,
-	   0x1.f0e3a6p-1,
-	   0x1.f14e82p-1,
-	   0x1.f1b6d0p-1,
-	   0x1.f21ca0p-1,
-	   0x1.f27ff8p-1,
-	   0x1.f2e0eap-1,
-	   0x1.f33f7ep-1,
-	   0x1.f39bc2p-1,
-	   0x1.f3f5c2p-1,
-	   0x1.f44d88p-1,
-	   0x1.f4a31ep-1,
-	   0x1.f4f694p-1,
-	   0x1.f547f2p-1,
-	   0x1.f59742p-1,
-	   0x1.f5e490p-1,
-	   0x1.f62fe8p-1,
-	   0x1.f67952p-1,
-	   0x1.f6c0dcp-1,
-	   0x1.f7068cp-1,
-	   0x1.f74a6ep-1,
-	   0x1.f78c8cp-1,
-	   0x1.f7cceep-1,
-	   0x1.f80ba2p-1,
-	   0x1.f848acp-1,
-	   0x1.f8841ap-1,
-	   0x1.f8bdf2p-1,
-	   0x1.f8f63ep-1,
-	   0x1.f92d08p-1,
-	   0x1.f96256p-1,
-	   0x1.f99634p-1,
-	   0x1.f9c8a8p-1,
-	   0x1.f9f9bap-1,
-	   0x1.fa2974p-1,
-	   0x1.fa57dep-1,
-	   0x1.fa84fep-1,
-	   0x1.fab0dep-1,
-	   0x1.fadb84p-1,
-	   0x1.fb04f6p-1,
-	   0x1.fb2d40p-1,
-	   0x1.fb5464p-1,
-	   0x1.fb7a6cp-1,
-	   0x1.fb9f60p-1,
-	   0x1.fbc344p-1,
-	   0x1.fbe61ep-1,
-	   0x1.fc07fap-1,
-	   0x1.fc28d8p-1,
-	   0x1.fc48c2p-1,
-	   0x1.fc67bcp-1,
-	   0x1.fc85d0p-1,
-	   0x1.fca2fep-1,
-	   0x1.fcbf52p-1,
-	   0x1.fcdaccp-1,
-	   0x1.fcf576p-1,
-	   0x1.fd0f54p-1,
-	   0x1.fd286ap-1,
-	   0x1.fd40bep-1,
-	   0x1.fd5856p-1,
-	   0x1.fd6f34p-1,
-	   0x1.fd8562p-1,
-	   0x1.fd9ae2p-1,
-	   0x1.fdafb8p-1,
-	   0x1.fdc3e8p-1,
-	   0x1.fdd77ap-1,
-	   0x1.fdea6ep-1,
-	   0x1.fdfcccp-1,
-	   0x1.fe0e96p-1,
-	   0x1.fe1fd0p-1,
-	   0x1.fe3080p-1,
-	   0x1.fe40a6p-1,
-	   0x1.fe504cp-1,
-	   0x1.fe5f70p-1,
-	   0x1.fe6e18p-1,
-	   0x1.fe7c46p-1,
-	   0x1.fe8a00p-1,
-	   0x1.fe9748p-1,
-	   0x1.fea422p-1,
-	   0x1.feb090p-1,
-	   0x1.febc96p-1,
-	   0x1.fec836p-1,
-	   0x1.fed374p-1,
-	   0x1.fede52p-1,
-	   0x1.fee8d4p-1,
-	   0x1.fef2fep-1,
-	   0x1.fefccep-1,
-	   0x1.ff064cp-1,
-	   0x1.ff0f76p-1,
-	   0x1.ff1852p-1,
-	   0x1.ff20e0p-1,
-	   0x1.ff2924p-1,
-	   0x1.ff3120p-1,
-	   0x1.ff38d6p-1,
-	   0x1.ff4048p-1,
-	   0x1.ff4778p-1,
-	   0x1.ff4e68p-1,
-	   0x1.ff551ap-1,
-	   0x1.ff5b90p-1,
-	   0x1.ff61ccp-1,
-	   0x1.ff67d0p-1,
-	   0x1.ff6d9ep-1,
-	   0x1.ff7338p-1,
-	   0x1.ff789ep-1,
-	   0x1.ff7dd4p-1,
-	   0x1.ff82dap-1,
-	   0x1.ff87b2p-1,
-	   0x1.ff8c5cp-1,
-	   0x1.ff90dcp-1,
-	   0x1.ff9532p-1,
-	   0x1.ff9960p-1,
-	   0x1.ff9d68p-1,
-	   0x1.ffa14ap-1,
-	   0x1.ffa506p-1,
-	   0x1.ffa8a0p-1,
-	   0x1.ffac18p-1,
-	   0x1.ffaf6ep-1,
-	   0x1.ffb2a6p-1,
-	   0x1.ffb5bep-1,
-	   0x1.ffb8b8p-1,
-	   0x1.ffbb98p-1,
-	   0x1.ffbe5ap-1,
-	   0x1.ffc102p-1,
-	   0x1.ffc390p-1,
-	   0x1.ffc606p-1,
-	   0x1.ffc862p-1,
-	   0x1.ffcaa8p-1,
-	   0x1.ffccd8p-1,
-	   0x1.ffcef4p-1,
-	   0x1.ffd0fap-1,
-	   0x1.ffd2eap-1,
-	   0x1.ffd4cap-1,
-	   0x1.ffd696p-1,
-	   0x1.ffd84ep-1,
-	   0x1.ffd9f8p-1,
-	   0x1.ffdb90p-1,
-	   0x1.ffdd18p-1,
-	   0x1.ffde90p-1,
-	   0x1.ffdffap-1,
-	   0x1.ffe154p-1,
-	   0x1.ffe2a2p-1,
-	   0x1.ffe3e2p-1,
-	   0x1.ffe514p-1,
-	   0x1.ffe63cp-1,
-	   0x1.ffe756p-1,
-	   0x1.ffe866p-1,
-	   0x1.ffe96ap-1,
-	   0x1.ffea64p-1,
-	   0x1.ffeb54p-1,
-	   0x1.ffec3ap-1,
-	   0x1.ffed16p-1,
-	   0x1.ffedeap-1,
-	   0x1.ffeeb4p-1,
-	   0x1.ffef76p-1,
-	   0x1.fff032p-1,
-	   0x1.fff0e4p-1,
-	   0x1.fff18ep-1,
-	   0x1.fff232p-1,
-	   0x1.fff2d0p-1,
-	   0x1.fff366p-1,
-	   0x1.fff3f6p-1,
-	   0x1.fff480p-1,
-	   0x1.fff504p-1,
-	   0x1.fff582p-1,
-	   0x1.fff5fcp-1,
-	   0x1.fff670p-1,
-	   0x1.fff6dep-1,
-	   0x1.fff74ap-1,
-	   0x1.fff7aep-1,
-	   0x1.fff810p-1,
-	   0x1.fff86cp-1,
-	   0x1.fff8c6p-1,
-	   0x1.fff91cp-1,
-	   0x1.fff96cp-1,
-	   0x1.fff9bap-1,
-	   0x1.fffa04p-1,
-	   0x1.fffa4cp-1,
-	   0x1.fffa90p-1,
-	   0x1.fffad0p-1,
-	   0x1.fffb0ep-1,
-	   0x1.fffb4ap-1,
-	   0x1.fffb82p-1,
-	   0x1.fffbb8p-1,
-	   0x1.fffbecp-1,
-	   0x1.fffc1ep-1,
-	   0x1.fffc4ep-1,
-	   0x1.fffc7ap-1,
-	   0x1.fffca6p-1,
-	   0x1.fffccep-1,
-	   0x1.fffcf6p-1,
-	   0x1.fffd1ap-1,
-	   0x1.fffd3ep-1,
-	   0x1.fffd60p-1,
-	   0x1.fffd80p-1,
-	   0x1.fffda0p-1,
-	   0x1.fffdbep-1,
-	   0x1.fffddap-1,
-	   0x1.fffdf4p-1,
-	   0x1.fffe0ep-1,
-	   0x1.fffe26p-1,
-	   0x1.fffe3ep-1,
-	   0x1.fffe54p-1,
-	   0x1.fffe68p-1,
-	   0x1.fffe7ep-1,
-	   0x1.fffe90p-1,
-	   0x1.fffea2p-1,
-	   0x1.fffeb4p-1,
-	   0x1.fffec4p-1,
-	   0x1.fffed4p-1,
-	   0x1.fffee4p-1,
-	   0x1.fffef2p-1,
-	   0x1.ffff00p-1,
-	   0x1.ffff0cp-1,
-	   0x1.ffff18p-1,
-	   0x1.ffff24p-1,
-	   0x1.ffff30p-1,
-	   0x1.ffff3ap-1,
-	   0x1.ffff44p-1,
-	   0x1.ffff4ep-1,
-	   0x1.ffff56p-1,
-	   0x1.ffff60p-1,
-	   0x1.ffff68p-1,
-	   0x1.ffff70p-1,
-	   0x1.ffff78p-1,
-	   0x1.ffff7ep-1,
-	   0x1.ffff84p-1,
-	   0x1.ffff8cp-1,
-	   0x1.ffff92p-1,
-	   0x1.ffff98p-1,
-	   0x1.ffff9cp-1,
-	   0x1.ffffa2p-1,
-	   0x1.ffffa6p-1,
-	   0x1.ffffacp-1,
-	   0x1.ffffb0p-1,
-	   0x1.ffffb4p-1,
-	   0x1.ffffb8p-1,
-	   0x1.ffffbcp-1,
-	   0x1.ffffc0p-1,
-	   0x1.ffffc4p-1,
-	   0x1.ffffc6p-1,
-	   0x1.ffffcap-1,
-	   0x1.ffffccp-1,
-	   0x1.ffffd0p-1,
-	   0x1.ffffd2p-1,
-	   0x1.ffffd4p-1,
-	   0x1.ffffd6p-1,
-	   0x1.ffffd8p-1,
-	   0x1.ffffdcp-1,
-	   0x1.ffffdep-1,
-	   0x1.ffffdep-1,
-	   0x1.ffffe0p-1,
-	   0x1.ffffe2p-1,
-	   0x1.ffffe4p-1,
-	   0x1.ffffe6p-1,
-	   0x1.ffffe8p-1,
-	   0x1.ffffe8p-1,
-	   0x1.ffffeap-1,
-	   0x1.ffffeap-1,
-	   0x1.ffffecp-1,
-	   0x1.ffffeep-1,
-	   0x1.ffffeep-1,
-	   0x1.fffff0p-1,
-	   0x1.fffff0p-1,
-	   0x1.fffff2p-1,
-	   0x1.fffff2p-1,
-	   0x1.fffff2p-1,
-	   0x1.fffff4p-1,
-	   0x1.fffff4p-1,
-	   0x1.fffff4p-1,
-	   0x1.fffff6p-1,
-	   0x1.fffff6p-1,
-	   0x1.fffff6p-1,
-	   0x1.fffff8p-1,
-	   0x1.fffff8p-1,
-	   0x1.fffff8p-1,
-	   0x1.fffff8p-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-  },
-  .scale = { 0x1.20dd76p+0,
-	     0x1.20d8f2p+0,
-	     0x1.20cb68p+0,
-	     0x1.20b4d8p+0,
-	     0x1.209546p+0,
-	     0x1.206cb4p+0,
-	     0x1.203b26p+0,
-	     0x1.2000a0p+0,
-	     0x1.1fbd28p+0,
-	     0x1.1f70c4p+0,
-	     0x1.1f1b7ap+0,
-	     0x1.1ebd56p+0,
-	     0x1.1e565cp+0,
-	     0x1.1de698p+0,
-	     0x1.1d6e14p+0,
-	     0x1.1cecdcp+0,
-	     0x1.1c62fap+0,
-	     0x1.1bd07cp+0,
-	     0x1.1b3572p+0,
-	     0x1.1a91e6p+0,
-	     0x1.19e5eap+0,
-	     0x1.19318cp+0,
-	     0x1.1874dep+0,
-	     0x1.17aff0p+0,
-	     0x1.16e2d8p+0,
-	     0x1.160da4p+0,
-	     0x1.153068p+0,
-	     0x1.144b3cp+0,
-	     0x1.135e30p+0,
-	     0x1.12695ep+0,
-	     0x1.116cd8p+0,
-	     0x1.1068bap+0,
-	     0x1.0f5d16p+0,
-	     0x1.0e4a08p+0,
-	     0x1.0d2fa6p+0,
-	     0x1.0c0e0ap+0,
-	     0x1.0ae550p+0,
-	     0x1.09b590p+0,
-	     0x1.087ee4p+0,
-	     0x1.07416cp+0,
-	     0x1.05fd3ep+0,
-	     0x1.04b27cp+0,
-	     0x1.036140p+0,
-	     0x1.0209a6p+0,
-	     0x1.00abd0p+0,
-	     0x1.fe8fb0p-1,
-	     0x1.fbbbbep-1,
-	     0x1.f8dc0ap-1,
-	     0x1.f5f0cep-1,
-	     0x1.f2fa4cp-1,
-	     0x1.eff8c4p-1,
-	     0x1.ecec78p-1,
-	     0x1.e9d5a8p-1,
-	     0x1.e6b498p-1,
-	     0x1.e38988p-1,
-	     0x1.e054bep-1,
-	     0x1.dd167cp-1,
-	     0x1.d9cf06p-1,
-	     0x1.d67ea2p-1,
-	     0x1.d32592p-1,
-	     0x1.cfc41ep-1,
-	     0x1.cc5a8ap-1,
-	     0x1.c8e91cp-1,
-	     0x1.c5701ap-1,
-	     0x1.c1efcap-1,
-	     0x1.be6872p-1,
-	     0x1.bada5ap-1,
-	     0x1.b745c6p-1,
-	     0x1.b3aafcp-1,
-	     0x1.b00a46p-1,
-	     0x1.ac63e8p-1,
-	     0x1.a8b828p-1,
-	     0x1.a5074ep-1,
-	     0x1.a1519ep-1,
-	     0x1.9d9762p-1,
-	     0x1.99d8dap-1,
-	     0x1.961650p-1,
-	     0x1.925008p-1,
-	     0x1.8e8646p-1,
-	     0x1.8ab950p-1,
-	     0x1.86e96ap-1,
-	     0x1.8316d6p-1,
-	     0x1.7f41dcp-1,
-	     0x1.7b6abcp-1,
-	     0x1.7791b8p-1,
-	     0x1.73b714p-1,
-	     0x1.6fdb12p-1,
-	     0x1.6bfdf0p-1,
-	     0x1.681ff2p-1,
-	     0x1.644156p-1,
-	     0x1.60625cp-1,
-	     0x1.5c8342p-1,
-	     0x1.58a446p-1,
-	     0x1.54c5a6p-1,
-	     0x1.50e79ep-1,
-	     0x1.4d0a68p-1,
-	     0x1.492e42p-1,
-	     0x1.455366p-1,
-	     0x1.417a0cp-1,
-	     0x1.3da26ep-1,
-	     0x1.39ccc2p-1,
-	     0x1.35f940p-1,
-	     0x1.32281ep-1,
-	     0x1.2e5992p-1,
-	     0x1.2a8dcep-1,
-	     0x1.26c508p-1,
-	     0x1.22ff72p-1,
-	     0x1.1f3d3cp-1,
-	     0x1.1b7e98p-1,
-	     0x1.17c3b6p-1,
-	     0x1.140cc4p-1,
-	     0x1.1059eep-1,
-	     0x1.0cab62p-1,
-	     0x1.09014cp-1,
-	     0x1.055bd6p-1,
-	     0x1.01bb2cp-1,
-	     0x1.fc3ee6p-2,
-	     0x1.f511aap-2,
-	     0x1.edeeeep-2,
-	     0x1.e6d700p-2,
-	     0x1.dfca26p-2,
-	     0x1.d8c8aap-2,
-	     0x1.d1d2d0p-2,
-	     0x1.cae8dap-2,
-	     0x1.c40b08p-2,
-	     0x1.bd3998p-2,
-	     0x1.b674c8p-2,
-	     0x1.afbcd4p-2,
-	     0x1.a911f0p-2,
-	     0x1.a27456p-2,
-	     0x1.9be438p-2,
-	     0x1.9561c8p-2,
-	     0x1.8eed36p-2,
-	     0x1.8886b2p-2,
-	     0x1.822e66p-2,
-	     0x1.7be47ap-2,
-	     0x1.75a91ap-2,
-	     0x1.6f7c6ap-2,
-	     0x1.695e8cp-2,
-	     0x1.634fa6p-2,
-	     0x1.5d4fd4p-2,
-	     0x1.575f34p-2,
-	     0x1.517de6p-2,
-	     0x1.4bac00p-2,
-	     0x1.45e99cp-2,
-	     0x1.4036d0p-2,
-	     0x1.3a93b2p-2,
-	     0x1.350052p-2,
-	     0x1.2f7cc4p-2,
-	     0x1.2a0916p-2,
-	     0x1.24a554p-2,
-	     0x1.1f518ap-2,
-	     0x1.1a0dc6p-2,
-	     0x1.14da0ap-2,
-	     0x1.0fb662p-2,
-	     0x1.0aa2d0p-2,
-	     0x1.059f5ap-2,
-	     0x1.00ac00p-2,
-	     0x1.f79184p-3,
-	     0x1.edeb40p-3,
-	     0x1.e46530p-3,
-	     0x1.daff4ap-3,
-	     0x1.d1b982p-3,
-	     0x1.c893cep-3,
-	     0x1.bf8e1cp-3,
-	     0x1.b6a856p-3,
-	     0x1.ade26cp-3,
-	     0x1.a53c42p-3,
-	     0x1.9cb5bep-3,
-	     0x1.944ec2p-3,
-	     0x1.8c0732p-3,
-	     0x1.83deeap-3,
-	     0x1.7bd5c8p-3,
-	     0x1.73eba4p-3,
-	     0x1.6c2056p-3,
-	     0x1.6473b6p-3,
-	     0x1.5ce596p-3,
-	     0x1.5575c8p-3,
-	     0x1.4e241ep-3,
-	     0x1.46f066p-3,
-	     0x1.3fda6cp-3,
-	     0x1.38e1fap-3,
-	     0x1.3206dcp-3,
-	     0x1.2b48dap-3,
-	     0x1.24a7b8p-3,
-	     0x1.1e233ep-3,
-	     0x1.17bb2cp-3,
-	     0x1.116f48p-3,
-	     0x1.0b3f52p-3,
-	     0x1.052b0cp-3,
-	     0x1.fe6460p-4,
-	     0x1.f2a902p-4,
-	     0x1.e72372p-4,
-	     0x1.dbd32ap-4,
-	     0x1.d0b7a0p-4,
-	     0x1.c5d04ap-4,
-	     0x1.bb1c98p-4,
-	     0x1.b09bfcp-4,
-	     0x1.a64de6p-4,
-	     0x1.9c31c6p-4,
-	     0x1.92470ap-4,
-	     0x1.888d1ep-4,
-	     0x1.7f036cp-4,
-	     0x1.75a960p-4,
-	     0x1.6c7e64p-4,
-	     0x1.6381e2p-4,
-	     0x1.5ab342p-4,
-	     0x1.5211ecp-4,
-	     0x1.499d48p-4,
-	     0x1.4154bcp-4,
-	     0x1.3937b2p-4,
-	     0x1.31458ep-4,
-	     0x1.297dbap-4,
-	     0x1.21df9ap-4,
-	     0x1.1a6a96p-4,
-	     0x1.131e14p-4,
-	     0x1.0bf97ep-4,
-	     0x1.04fc3ap-4,
-	     0x1.fc4b5ep-5,
-	     0x1.eeea8cp-5,
-	     0x1.e1d4d0p-5,
-	     0x1.d508fap-5,
-	     0x1.c885e0p-5,
-	     0x1.bc4a54p-5,
-	     0x1.b05530p-5,
-	     0x1.a4a54ap-5,
-	     0x1.99397ap-5,
-	     0x1.8e109cp-5,
-	     0x1.83298ep-5,
-	     0x1.78832cp-5,
-	     0x1.6e1c58p-5,
-	     0x1.63f3f6p-5,
-	     0x1.5a08e8p-5,
-	     0x1.505a18p-5,
-	     0x1.46e66cp-5,
-	     0x1.3dacd2p-5,
-	     0x1.34ac36p-5,
-	     0x1.2be38cp-5,
-	     0x1.2351c2p-5,
-	     0x1.1af5d2p-5,
-	     0x1.12ceb4p-5,
-	     0x1.0adb60p-5,
-	     0x1.031ad6p-5,
-	     0x1.f7182ap-6,
-	     0x1.e85c44p-6,
-	     0x1.da0006p-6,
-	     0x1.cc0180p-6,
-	     0x1.be5ecep-6,
-	     0x1.b1160ap-6,
-	     0x1.a4255ap-6,
-	     0x1.978ae8p-6,
-	     0x1.8b44e6p-6,
-	     0x1.7f5188p-6,
-	     0x1.73af0cp-6,
-	     0x1.685bb6p-6,
-	     0x1.5d55ccp-6,
-	     0x1.529b9ep-6,
-	     0x1.482b84p-6,
-	     0x1.3e03d8p-6,
-	     0x1.3422fep-6,
-	     0x1.2a875cp-6,
-	     0x1.212f62p-6,
-	     0x1.181984p-6,
-	     0x1.0f443ep-6,
-	     0x1.06ae14p-6,
-	     0x1.fcab14p-7,
-	     0x1.ec7262p-7,
-	     0x1.dcaf36p-7,
-	     0x1.cd5ecap-7,
-	     0x1.be7e5ap-7,
-	     0x1.b00b38p-7,
-	     0x1.a202bep-7,
-	     0x1.94624ep-7,
-	     0x1.87275ep-7,
-	     0x1.7a4f6ap-7,
-	     0x1.6dd7fep-7,
-	     0x1.61beaep-7,
-	     0x1.56011cp-7,
-	     0x1.4a9cf6p-7,
-	     0x1.3f8ff6p-7,
-	     0x1.34d7dcp-7,
-	     0x1.2a727ap-7,
-	     0x1.205dacp-7,
-	     0x1.169756p-7,
-	     0x1.0d1d6ap-7,
-	     0x1.03ede2p-7,
-	     0x1.f60d8ap-8,
-	     0x1.e4cc4ap-8,
-	     0x1.d4143ap-8,
-	     0x1.c3e1a6p-8,
-	     0x1.b430ecp-8,
-	     0x1.a4fe84p-8,
-	     0x1.9646f4p-8,
-	     0x1.8806d8p-8,
-	     0x1.7a3adep-8,
-	     0x1.6cdfccp-8,
-	     0x1.5ff276p-8,
-	     0x1.536fc2p-8,
-	     0x1.4754acp-8,
-	     0x1.3b9e40p-8,
-	     0x1.30499cp-8,
-	     0x1.2553eep-8,
-	     0x1.1aba78p-8,
-	     0x1.107a8cp-8,
-	     0x1.06918cp-8,
-	     0x1.f9f9d0p-9,
-	     0x1.e77448p-9,
-	     0x1.d58da6p-9,
-	     0x1.c4412cp-9,
-	     0x1.b38a3ap-9,
-	     0x1.a36454p-9,
-	     0x1.93cb12p-9,
-	     0x1.84ba30p-9,
-	     0x1.762d84p-9,
-	     0x1.682100p-9,
-	     0x1.5a90b0p-9,
-	     0x1.4d78bcp-9,
-	     0x1.40d564p-9,
-	     0x1.34a306p-9,
-	     0x1.28de12p-9,
-	     0x1.1d8318p-9,
-	     0x1.128ebap-9,
-	     0x1.07fdb4p-9,
-	     0x1.fb99b8p-10,
-	     0x1.e7f232p-10,
-	     0x1.d4fed8p-10,
-	     0x1.c2b9d0p-10,
-	     0x1.b11d70p-10,
-	     0x1.a02436p-10,
-	     0x1.8fc8c8p-10,
-	     0x1.8005f0p-10,
-	     0x1.70d6a4p-10,
-	     0x1.6235fcp-10,
-	     0x1.541f34p-10,
-	     0x1.468daep-10,
-	     0x1.397ceep-10,
-	     0x1.2ce898p-10,
-	     0x1.20cc76p-10,
-	     0x1.15246ep-10,
-	     0x1.09ec86p-10,
-	     0x1.fe41cep-11,
-	     0x1.e97ba4p-11,
-	     0x1.d57f52p-11,
-	     0x1.c245d4p-11,
-	     0x1.afc85ep-11,
-	     0x1.9e0058p-11,
-	     0x1.8ce75ep-11,
-	     0x1.7c7744p-11,
-	     0x1.6caa0ep-11,
-	     0x1.5d79ecp-11,
-	     0x1.4ee142p-11,
-	     0x1.40daa4p-11,
-	     0x1.3360ccp-11,
-	     0x1.266ea8p-11,
-	     0x1.19ff46p-11,
-	     0x1.0e0de8p-11,
-	     0x1.0295f0p-11,
-	     0x1.ef25d4p-12,
-	     0x1.da0110p-12,
-	     0x1.c5b542p-12,
-	     0x1.b23a5ap-12,
-	     0x1.9f8894p-12,
-	     0x1.8d986ap-12,
-	     0x1.7c629ap-12,
-	     0x1.6be022p-12,
-	     0x1.5c0a38p-12,
-	     0x1.4cda54p-12,
-	     0x1.3e4a24p-12,
-	     0x1.305390p-12,
-	     0x1.22f0b4p-12,
-	     0x1.161be4p-12,
-	     0x1.09cfa4p-12,
-	     0x1.fc0d56p-13,
-	     0x1.e577bcp-13,
-	     0x1.cfd4a6p-13,
-	     0x1.bb1a96p-13,
-	     0x1.a74068p-13,
-	     0x1.943d4ap-13,
-	     0x1.8208bcp-13,
-	     0x1.709a8ep-13,
-	     0x1.5feadap-13,
-	     0x1.4ff208p-13,
-	     0x1.40a8c2p-13,
-	     0x1.3207fcp-13,
-	     0x1.2408eap-13,
-	     0x1.16a502p-13,
-	     0x1.09d5f8p-13,
-	     0x1.fb2b7ap-14,
-	     0x1.e3bcf4p-14,
-	     0x1.cd5528p-14,
-	     0x1.b7e946p-14,
-	     0x1.a36eecp-14,
-	     0x1.8fdc1cp-14,
-	     0x1.7d2738p-14,
-	     0x1.6b4702p-14,
-	     0x1.5a329cp-14,
-	     0x1.49e178p-14,
-	     0x1.3a4b60p-14,
-	     0x1.2b6876p-14,
-	     0x1.1d3120p-14,
-	     0x1.0f9e1cp-14,
-	     0x1.02a868p-14,
-	     0x1.ec929ap-15,
-	     0x1.d4f4b4p-15,
-	     0x1.be6abcp-15,
-	     0x1.a8e8ccp-15,
-	     0x1.94637ep-15,
-	     0x1.80cfdcp-15,
-	     0x1.6e2368p-15,
-	     0x1.5c540cp-15,
-	     0x1.4b581cp-15,
-	     0x1.3b2652p-15,
-	     0x1.2bb5ccp-15,
-	     0x1.1cfe02p-15,
-	     0x1.0ef6c4p-15,
-	     0x1.019842p-15,
-	     0x1.e9b5e8p-16,
-	     0x1.d16f58p-16,
-	     0x1.ba4f04p-16,
-	     0x1.a447b8p-16,
-	     0x1.8f4cccp-16,
-	     0x1.7b5224p-16,
-	     0x1.684c22p-16,
-	     0x1.562facp-16,
-	     0x1.44f21ep-16,
-	     0x1.34894ap-16,
-	     0x1.24eb72p-16,
-	     0x1.160f44p-16,
-	     0x1.07ebd2p-16,
-	     0x1.f4f12ep-17,
-	     0x1.db5ad0p-17,
-	     0x1.c304f0p-17,
-	     0x1.abe09ep-17,
-	     0x1.95df98p-17,
-	     0x1.80f43ap-17,
-	     0x1.6d1178p-17,
-	     0x1.5a2ae0p-17,
-	     0x1.483488p-17,
-	     0x1.372310p-17,
-	     0x1.26eb9ep-17,
-	     0x1.1783cep-17,
-	     0x1.08e1bap-17,
-	     0x1.f5f7d8p-18,
-	     0x1.db92b6p-18,
-	     0x1.c282cep-18,
-	     0x1.aab7acp-18,
-	     0x1.94219cp-18,
-	     0x1.7eb1a2p-18,
-	     0x1.6a5972p-18,
-	     0x1.570b6ap-18,
-	     0x1.44ba86p-18,
-	     0x1.335a62p-18,
-	     0x1.22df2ap-18,
-	     0x1.133d96p-18,
-	     0x1.046aeap-18,
-	     0x1.ecb9d0p-19,
-	     0x1.d21398p-19,
-	     0x1.b8d094p-19,
-	     0x1.a0df10p-19,
-	     0x1.8a2e26p-19,
-	     0x1.74adc8p-19,
-	     0x1.604ea8p-19,
-	     0x1.4d0232p-19,
-	     0x1.3aba86p-19,
-	     0x1.296a70p-19,
-	     0x1.190562p-19,
-	     0x1.097f62p-19,
-	     0x1.f59a20p-20,
-	     0x1.d9c736p-20,
-	     0x1.bf716cp-20,
-	     0x1.a6852cp-20,
-	     0x1.8eefd8p-20,
-	     0x1.789fb8p-20,
-	     0x1.6383f8p-20,
-	     0x1.4f8c96p-20,
-	     0x1.3caa62p-20,
-	     0x1.2acee2p-20,
-	     0x1.19ec60p-20,
-	     0x1.09f5d0p-20,
-	     0x1.f5bd96p-21,
-	     0x1.d9371ep-21,
-	     0x1.be41dep-21,
-	     0x1.a4c89ep-21,
-	     0x1.8cb738p-21,
-	     0x1.75fa8ep-21,
-	     0x1.608078p-21,
-	     0x1.4c37c0p-21,
-	     0x1.39100ep-21,
-	     0x1.26f9e0p-21,
-	     0x1.15e682p-21,
-	     0x1.05c804p-21,
-	     0x1.ed2254p-22,
-	     0x1.d06ad6p-22,
-	     0x1.b551c8p-22,
-	     0x1.9bc0a0p-22,
-	     0x1.83a200p-22,
-	     0x1.6ce1aap-22,
-	     0x1.576c72p-22,
-	     0x1.43302cp-22,
-	     0x1.301ba2p-22,
-	     0x1.1e1e86p-22,
-	     0x1.0d2966p-22,
-	     0x1.fa5b50p-23,
-	     0x1.dc3ae4p-23,
-	     0x1.bfd756p-23,
-	     0x1.a517dap-23,
-	     0x1.8be4f8p-23,
-	     0x1.74287ep-23,
-	     0x1.5dcd66p-23,
-	     0x1.48bfd4p-23,
-	     0x1.34ecf8p-23,
-	     0x1.224310p-23,
-	     0x1.10b148p-23,
-  },
-};
diff --git a/pl/math/sv_exp10f_1u5.c b/pl/math/sv_exp10f_1u5.c
deleted file mode 100644
index 9ecde8f1aa528b..00000000000000
--- a/pl/math/sv_exp10f_1u5.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Single-precision SVE 2^x function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "include/mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
-
-/* For x < -SpecialBound, the result is subnormal and not handled correctly by
-   FEXPA.  */
-#define SpecialBound 37.9
-
-static const struct data
-{
-  float poly[5];
-  float shift, log10_2, log2_10_hi, log2_10_lo, special_bound;
-} data = {
-  /* Coefficients generated using Remez algorithm with minimisation of relative
-     error.
-     rel error: 0x1.89dafa3p-24
-     abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
-     maxerr: 0.52 +0.5 ulp.  */
-  .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f,
-	    0x1.12b41ap-1f },
-  /* 1.5*2^17 + 127, a shift value suitable for FEXPA.  */
-  .shift = 0x1.903f8p17f,
-  .log10_2 = 0x1.a934fp+1,
-  .log2_10_hi = 0x1.344136p-2,
-  .log2_10_lo = -0x1.ec10cp-27,
-  .special_bound = SpecialBound,
-};
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (exp10f, x, y, special);
-}
-
-/* Single-precision SVE exp10f routine. Implements the same algorithm
-   as AdvSIMD exp10f.
-   Worst case error is 1.02 ULPs.
-   _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
-				  want 0x1.ba5f9cp-1.  */
-svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-  /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
-     with poly(r) in [1/sqrt(2), sqrt(2)] and
-     x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N].  */
-
-  /* Load some constants in quad-word chunks to minimise memory access (last
-     lane is wasted).  */
-  svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2);
-
-  /* n = round(x/(log10(2)/N)).  */
-  svfloat32_t shift = sv_f32 (d->shift);
-  svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0);
-  svfloat32_t n = svsub_x (pg, z, shift);
-
-  /* r = x - n*log10(2)/N.  */
-  svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1);
-  r = svmls_lane (r, n, log10_2_and_inv, 2);
-
-  svbool_t special = svacgt (pg, x, d->special_bound);
-  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
-
-  /* Polynomial evaluation: poly(r) ~ exp10(r)-1.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t poly
-      = svmla_x (pg, svmul_x (pg, r, d->poly[0]),
-		 sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), special);
-
-  return svmla_x (pg, scale, scale, poly);
-}
-
-PL_SIG (SV, F, 1, exp10, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_F1 (exp10), 0.52)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), 0, SpecialBound, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), SpecialBound, inf, 50000)
diff --git a/pl/math/sv_exp2f_1u6.c b/pl/math/sv_exp2f_1u6.c
deleted file mode 100644
index 9698ff6f068294..00000000000000
--- a/pl/math/sv_exp2f_1u6.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Single-precision SVE 2^x function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "poly_sve_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float poly[5];
-  float shift, thres;
-} data = {
-  /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
-     compatibility with polynomial helpers.  */
-  .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f,
-	    0x1.59977ap-10f },
-  /* 1.5*2^17 + 127.  */
-  .shift = 0x1.903f8p17f,
-  /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
-     correctly by FEXPA.  */
-  .thres = 0x1.5d5e2ap+6f,
-};
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (exp2f, x, y, special);
-}
-
-/* Single-precision SVE exp2f routine. Implements the same algorithm
-   as AdvSIMD exp2f.
-   Worst case error is 1.04 ULPs.
-   SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0
-				  want 0x1.ba7ebp+0.  */
-svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-    x = n + r, with r in [-1/2, 1/2].  */
-  svfloat32_t shift = sv_f32 (d->shift);
-  svfloat32_t z = svadd_x (pg, x, shift);
-  svfloat32_t n = svsub_x (pg, z, shift);
-  svfloat32_t r = svsub_x (pg, x, n);
-
-  svbool_t special = svacgt (pg, x, d->thres);
-  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
-
-  /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
-     Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
-     coefficients 1 to 4, and apply most significant coefficient directly.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1);
-  svfloat32_t p0 = svmul_x (pg, r, d->poly[0]);
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), special);
-
-  return svmla_x (pg, scale, scale, poly);
-}
-
-PL_SIG (SV, F, 1, exp2, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_F1 (exp2), 0.55)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), 0, Thres, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, 1, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), 1, Thres, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, inf, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, -0x1p-23, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p-23, -1, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, -0x1p23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p23, -inf, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, ScaleThres, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -1, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, ScaleThres, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -inf, 50000)
diff --git a/pl/math/sv_expf_2u.c b/pl/math/sv_expf_2u.c
deleted file mode 100644
index 93d705ce420a0b..00000000000000
--- a/pl/math/sv_expf_2u.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Single-precision vector e^x function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float poly[5];
-  float inv_ln2, ln2_hi, ln2_lo, shift, thres;
-} data = {
-  /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
-     compatibility with polynomial helpers.  */
-  .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f,
-	    0x1.0e4020p-7f },
-  .inv_ln2 = 0x1.715476p+0f,
-  .ln2_hi = 0x1.62e4p-1f,
-  .ln2_lo = 0x1.7f7d1cp-20f,
-  /* 1.5*2^17 + 127.  */
-  .shift = 0x1.903f8p17f,
-  /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
-     correctly by FEXPA.  */
-  .thres = 0x1.5d5e2ap+6f,
-};
-
-#define C(i) sv_f32 (d->poly[i])
-#define ExponentBias 0x3f800000
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (expf, x, y, special);
-}
-
-/* Optimised single-precision SVE exp function.
-   Worst-case error is 1.04 ulp:
-   SV_NAME_F1 (exp)(0x1.a8eda4p+1) got 0x1.ba74bcp+4
-				  want 0x1.ba74bap+4.  */
-svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-
-  /* Load some constants in quad-word chunks to minimise memory access (last
-     lane is wasted).  */
-  svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2);
-
-  /* n = round(x/(ln2/N)).  */
-  svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0);
-  svfloat32_t n = svsub_x (pg, z, d->shift);
-
-  /* r = x - n*ln2/N.  */
-  svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1);
-  r = svmls_lane (r, n, invln2_and_ln2, 2);
-
-  /* scale = 2^(n/N).  */
-  svbool_t is_special_case = svacgt (pg, x, d->thres);
-  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
-
-  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
-  svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
-  svfloat32_t p34 = svmla_x (pg, C (3), C (4), r);
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
-  svfloat32_t p0 = svmul_x (pg, r, C (0));
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
-  if (unlikely (svptest_any (pg, is_special_case)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case);
-
-  return svmla_x (pg, scale, scale, poly);
-}
-
-PL_SIG (SV, F, 1, exp, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_F1 (exp), 0.55)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0, 0x1p-23, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0x1p-23, 1, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 1, 0x1p23, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0x1p23, inf, 50000)
diff --git a/pl/math/sv_expf_inline.h b/pl/math/sv_expf_inline.h
deleted file mode 100644
index 0ef4e0fda946e6..00000000000000
--- a/pl/math/sv_expf_inline.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * SVE helper for single-precision routines which calculate exp(x) and do
- * not need special-case handling
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_SV_EXPF_INLINE_H
-#define PL_MATH_SV_EXPF_INLINE_H
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-struct sv_expf_data
-{
-  float poly[5];
-  float inv_ln2, ln2_hi, ln2_lo, shift;
-};
-
-/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
-   compatibility with polynomial helpers. Shift is 1.5*2^17 + 127.  */
-#define SV_EXPF_DATA                                                          \
-  {                                                                           \
-    .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \
-	      0x1.0e4020p-7f },                                               \
-                                                                              \
-    .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f,                        \
-    .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f,                        \
-  }
-
-#define C(i) sv_f32 (d->poly[i])
-
-static inline svfloat32_t
-expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
-{
-  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-
-  /* Load some constants in quad-word chunks to minimise memory access.  */
-  svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]);
-
-  /* n = round(x/(ln2/N)).  */
-  svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1);
-  svfloat32_t n = svsub_x (pg, z, d->shift);
-
-  /* r = x - n*ln2/N.  */
-  svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2);
-  r = svmls_lane (r, n, c4_invln2_and_ln2, 3);
-
-  /* scale = 2^(n/N).  */
-  svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z));
-
-  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
-  svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
-  svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0);
-  svfloat32_t r2 = svmul_f32_x (pg, r, r);
-  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
-  svfloat32_t p0 = svmul_f32_x (pg, r, C (0));
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
-  return svmla_x (pg, scale, scale, poly);
-}
-
-#endif // PL_MATH_SV_EXPF_INLINE_H
\ No newline at end of file
diff --git a/pl/math/sv_log10_2u5.c b/pl/math/sv_log10_2u5.c
deleted file mode 100644
index f55e068fd442ca..00000000000000
--- a/pl/math/sv_log10_2u5.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Double-precision SVE log10(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
-
-#define Min 0x0010000000000000
-#define Max 0x7ff0000000000000
-#define Thres 0x7fe0000000000000 /* Max - Min.  */
-#define Off 0x3fe6900900000000
-#define N (1 << V_LOG10_TABLE_BITS)
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
-{
-  return sv_call_f64 (log10, x, y, special);
-}
-
-/* SVE log10 algorithm.
-   Maximum measured error is 2.46 ulps.
-   SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6
-					   want 0x1.fffbdf6eaa667p-6.  */
-svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t ix = svreinterpret_u64 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_x (pg, ix, Off);
-  svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG10_TABLE_BITS);
-  i = svand_x (pg, i, (N - 1) << 1);
-  svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
-  svfloat64_t z = svreinterpret_f64 (
-      svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
-
-  /* log(x) = k*log(2) + log(c) + log(z/c).  */
-  svfloat64_t invc = svld1_gather_index (pg, &__v_log10_data.table[0].invc, i);
-  svfloat64_t logc
-      = svld1_gather_index (pg, &__v_log10_data.table[0].log10c, i);
-
-  /* We approximate log(z/c) with a polynomial P(x) ~= log(x + 1):
-     r = z/c - 1 (we look up precomputed 1/c)
-     log(z/c) ~= P(r).  */
-  svfloat64_t r = svmad_x (pg, invc, z, -1.0);
-
-  /* hi = log(c) + k*log(2).  */
-  svfloat64_t w = svmla_x (pg, logc, r, __v_log10_data.invln10);
-  svfloat64_t hi = svmla_x (pg, w, k, __v_log10_data.log10_2);
-
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log10_data.poly);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y),
-			 special);
-  return svmla_x (pg, hi, r2, y);
-}
-
-PL_SIG (SV, D, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_D1 (log10), 1.97)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 100, inf, 50000)
diff --git a/pl/math/sv_log1pf_1u3.c b/pl/math/sv_log1pf_1u3.c
deleted file mode 100644
index ea1a3dbf723a8e..00000000000000
--- a/pl/math/sv_log1pf_1u3.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Single-precision vector log(x + 1) function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
-
-static const struct data
-{
-  float poly[8];
-  float ln2, exp_bias;
-  uint32_t four, three_quarters;
-} data = {.poly = {/* Do not store first term of polynomial, which is -0.5, as
-                      this can be fmov-ed directly instead of including it in
-                      the main load-and-mla polynomial schedule.  */
-		   0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-		   -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f,
-		   0x1.abcb6p-4f, -0x1.6f0d5ep-5f},
-	  .ln2 = 0x1.62e43p-1f,
-	  .exp_bias = 0x1p-23f,
-	  .four = 0x40800000,
-	  .three_quarters = 0x3f400000};
-
-#define SignExponentMask 0xff800000
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (log1pf, x, y, special);
-}
-
-/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
-   error is 1.27 ULP very close to 0.5.
-   _ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2
-				 want 0x1.9f323ep-2.  */
-svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-  /* x < -1, Inf/Nan.  */
-  svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
-  special = svorn_z (pg, special, svcmpge (pg, x, -1));
-
-  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
-			   is in [-0.25, 0.5]):
-     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
-     We approximate log1p(m) with a polynomial, then scale by
-     k*log(2). Instead of doing this directly, we use an intermediate
-     scale factor s = 4*k*log(2) to ensure the scale is representable
-     as a normalised fp32 number.  */
-  svfloat32_t m = svadd_x (pg, x, 1);
-
-  /* Choose k to scale x to the range [-1/4, 1/2].  */
-  svint32_t k
-      = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
-		 sv_s32 (SignExponentMask));
-
-  /* Scale x by exponent manipulation.  */
-  svfloat32_t m_scale = svreinterpret_f32 (
-      svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
-
-  /* Scale up to ensure that the scale factor is representable as normalised
-     fp32 number, and scale m down accordingly.  */
-  svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
-  m_scale = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1), s, 0.25));
-
-  /* Evaluate polynomial on reduced interval.  */
-  svfloat32_t ms2 = svmul_x (pg, m_scale, m_scale),
-	      ms4 = svmul_x (pg, ms2, ms2);
-  svfloat32_t p = sv_estrin_7_f32_x (pg, m_scale, ms2, ms4, d->poly);
-  p = svmad_x (pg, m_scale, p, -0.5);
-  p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
-
-  /* The scale factor to be applied back at the end - by multiplying float(k)
-     by 2^-23 we get the unbiased exponent of k.  */
-  svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->exp_bias);
-
-  /* Apply the scaling back.  */
-  svfloat32_t y = svmla_x (pg, p, scale_back, d->ln2);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-
-  return y;
-}
-
-PL_SIG (SV, F, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (log1p), 0.77)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0, 0x1p-23, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0x1p-23, 1, 5000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log1p), 1, inf, 10000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log1p), -1, -inf, 10)
diff --git a/pl/math/sv_log1pf_inline.h b/pl/math/sv_log1pf_inline.h
deleted file mode 100644
index d13b094f6b5d28..00000000000000
--- a/pl/math/sv_log1pf_inline.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Helper for SVE routines which calculate log(1 + x) and do not
- * need special-case handling
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_SV_LOG1PF_INLINE_H
-#define PL_MATH_SV_LOG1PF_INLINE_H
-
-#include "v_math.h"
-#include "math_config.h"
-#include "poly_sve_f32.h"
-
-static const struct sv_log1pf_data
-{
-  float32_t poly[9];
-  float32_t ln2;
-  float32_t scale_back;
-} sv_log1pf_data = {
-  /* Polynomial generated using FPMinimax in [-0.25, 0.5].  */
-  .poly = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-	    -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
-	    -0x1.6f0d5ep-5f },
-  .scale_back = 0x1.0p-23f,
-  .ln2 = 0x1.62e43p-1f,
-};
-
-static inline svfloat32_t
-eval_poly (svfloat32_t m, const float32_t *c, svbool_t pg)
-{
-  svfloat32_t p_12 = svmla_x (pg, sv_f32 (c[0]), m, sv_f32 (c[1]));
-  svfloat32_t m2 = svmul_x (pg, m, m);
-  svfloat32_t q = svmla_x (pg, m, m2, p_12);
-  svfloat32_t p = sv_pw_horner_6_f32_x (pg, m, m2, c + 2);
-  p = svmul_x (pg, m2, p);
-
-  return svmla_x (pg, q, m2, p);
-}
-
-static inline svfloat32_t
-sv_log1pf_inline (svfloat32_t x, svbool_t pg)
-{
-  const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
-
-  svfloat32_t m = svadd_x (pg, x, 1.0f);
-
-  svint32_t ks = svsub_x (pg, svreinterpret_s32 (m),
-			  svreinterpret_s32 (svdup_f32 (0.75f)));
-  ks = svand_x (pg, ks, 0xff800000);
-  svuint32_t k = svreinterpret_u32 (ks);
-  svfloat32_t s = svreinterpret_f32 (
-      svsub_x (pg, svreinterpret_u32 (svdup_f32 (4.0f)), k));
-
-  svfloat32_t m_scale
-      = svreinterpret_f32 (svsub_x (pg, svreinterpret_u32 (x), k));
-  m_scale
-      = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1.0f), sv_f32 (0.25f), s));
-  svfloat32_t p = eval_poly (m_scale, d->poly, pg);
-  svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->scale_back);
-  return svmla_x (pg, p, scale_back, d->ln2);
-}
-
-#endif //  PL_MATH_SV_LOG1PF_INLINE_H
\ No newline at end of file
diff --git a/pl/math/sv_log2_3u.c b/pl/math/sv_log2_3u.c
deleted file mode 100644
index 0775a39cc85d60..00000000000000
--- a/pl/math/sv_log2_3u.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Double-precision SVE log2 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
-
-#define N (1 << V_LOG2_TABLE_BITS)
-#define Off 0x3fe6900900000000
-#define Max (0x7ff0000000000000)
-#define Min (0x0010000000000000)
-#define Thresh (0x7fe0000000000000) /* Max - Min.  */
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
-{
-  return sv_call_f64 (log2, x, y, cmp);
-}
-
-/* Double-precision SVE log2 routine.
-   Implements the same algorithm as AdvSIMD log10, with coefficients and table
-   entries scaled in extended precision.
-   The maximum observed error is 2.58 ULP:
-   SV_NAME_D1 (log2)(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
-					  want 0x1.fffb34198d9ddp-5.  */
-svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t ix = svreinterpret_u64 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh);
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_x (pg, ix, Off);
-  svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG2_TABLE_BITS);
-  i = svand_x (pg, i, (N - 1) << 1);
-  svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
-  svfloat64_t z = svreinterpret_f64 (
-      svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
-
-  svfloat64_t invc = svld1_gather_index (pg, &__v_log2_data.table[0].invc, i);
-  svfloat64_t log2c
-      = svld1_gather_index (pg, &__v_log2_data.table[0].log2c, i);
-
-  /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k.  */
-
-  svfloat64_t r = svmad_x (pg, invc, z, -1.0);
-  svfloat64_t w = svmla_x (pg, log2c, r, __v_log2_data.invln2);
-
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log2_data.poly);
-  w = svadd_x (pg, k, w);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), w, r2, y),
-			 special);
-  return svmla_x (pg, w, r2, y);
-}
-
-PL_SIG (SV, D, 1, log2, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_D1 (log2), 2.09)
-PL_TEST_EXPECT_FENV_ALWAYS (SV_NAME_D1 (log2))
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), -0.0, -0x1p126, 1000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0.0, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 100, inf, 50000)
diff --git a/pl/math/sv_log_2u5.c b/pl/math/sv_log_2u5.c
deleted file mode 100644
index 2530c9e3f62cef..00000000000000
--- a/pl/math/sv_log_2u5.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Double-precision SVE log(x) function.
- *
- * Copyright (c) 2020-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define P(i) sv_f64 (__v_log_data.poly[i])
-#define N (1 << V_LOG_TABLE_BITS)
-#define Off (0x3fe6900900000000)
-#define MaxTop (0x7ff)
-#define MinTop (0x001)
-#define ThreshTop (0x7fe) /* MaxTop - MinTop.  */
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
-{
-  return sv_call_f64 (log, x, y, cmp);
-}
-
-/* SVE port of AdvSIMD log algorithm.
-   Maximum measured error is 2.17 ulp:
-   SV_NAME_D1 (log)(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
-					 want 0x1.ffffff1cca045p-2.  */
-svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t ix = svreinterpret_u64 (x);
-  svuint64_t top = svlsr_x (pg, ix, 52);
-  svbool_t cmp = svcmpge (pg, svsub_x (pg, top, MinTop), sv_u64 (ThreshTop));
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_x (pg, ix, Off);
-  /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
-     The actual value of i is double this due to table layout.  */
-  svuint64_t i
-      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
-  svint64_t k
-      = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift.  */
-  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
-  svfloat64_t z = svreinterpret_f64 (iz);
-  /* Lookup in 2 global lists (length N).  */
-  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
-  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-  svfloat64_t r = svmad_x (pg, invc, z, -1);
-  svfloat64_t kd = svcvt_f64_x (pg, k);
-  /* hi = r + log(c) + k*Ln2.  */
-  svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2);
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = svmla_x (pg, P (2), r, P (3));
-  svfloat64_t p = svmla_x (pg, P (0), r, P (1));
-  y = svmla_x (pg, y, r2, P (4));
-  y = svmla_x (pg, p, r2, y);
-
-  if (unlikely (svptest_any (pg, cmp)))
-    return special_case (x, svmla_x (svnot_z (pg, cmp), hi, r2, y), cmp);
-  return svmla_x (pg, hi, r2, y);
-}
-
-PL_SIG (SV, D, 1, log, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_D1 (log), 1.68)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), -0.0, -inf, 1000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 0, 0x1p-149, 1000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 100, inf, 50000)
diff --git a/pl/math/sv_tan_3u5.c b/pl/math/sv_tan_3u5.c
deleted file mode 100644
index 746396e98a1024..00000000000000
--- a/pl/math/sv_tan_3u5.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Double-precision SVE tan(x) function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  double poly[9];
-  double half_pi_hi, half_pi_lo, inv_half_pi, range_val, shift;
-} data = {
-  /* Polynomial generated with FPMinimax.  */
-  .poly = { 0x1.5555555555556p-2, 0x1.1111111110a63p-3, 0x1.ba1ba1bb46414p-5,
-	    0x1.664f47e5b5445p-6, 0x1.226e5e5ecdfa3p-7, 0x1.d6c7ddbf87047p-9,
-	    0x1.7ea75d05b583ep-10, 0x1.289f22964a03cp-11,
-	    0x1.4e4fd14147622p-12, },
-  .half_pi_hi = 0x1.921fb54442d18p0,
-  .half_pi_lo = 0x1.1a62633145c07p-54,
-  .inv_half_pi = 0x1.45f306dc9c883p-1,
-  .range_val = 0x1p23,
-  .shift = 0x1.8p52,
-};
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
-{
-  return sv_call_f64 (tan, x, y, special);
-}
-
-/* Vector approximation for double-precision tan.
-   Maximum measured error is 3.48 ULP:
-   _ZGVsMxv_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
-				      want -0x1.f6ccd8ecf7deap+37.  */
-svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
-{
-  const struct data *dat = ptr_barrier (&data);
-
-  /* Invert condition to catch NaNs and Infs as well as large values.  */
-  svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val));
-
-  /* q = nearest integer to 2 * x / pi.  */
-  svfloat64_t shift = sv_f64 (dat->shift);
-  svfloat64_t q = svmla_x (pg, shift, x, dat->inv_half_pi);
-  q = svsub_x (pg, q, shift);
-  svint64_t qi = svcvt_s64_x (pg, q);
-
-  /* Use q to reduce x to r in [-pi/4, pi/4], by:
-     r = x - q * pi/2, in extended precision.  */
-  svfloat64_t r = x;
-  svfloat64_t half_pi = svld1rq (svptrue_b64 (), &dat->half_pi_hi);
-  r = svmls_lane (r, q, half_pi, 0);
-  r = svmls_lane (r, q, half_pi, 1);
-  /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
-     formula.  */
-  r = svmul_x (pg, r, 0.5);
-
-  /* Approximate tan(r) using order 8 polynomial.
-     tan(x) is odd, so polynomial has the form:
-     tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
-     Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
-     Then compute the approximation by:
-     tan(r) ~= r + r^3 * (C0 + r^2 * P(r)).  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t r4 = svmul_x (pg, r2, r2);
-  svfloat64_t r8 = svmul_x (pg, r4, r4);
-  /* Use offset version coeff array by 1 to evaluate from C1 onwards.  */
-  svfloat64_t p = sv_estrin_7_f64_x (pg, r2, r4, r8, dat->poly + 1);
-  p = svmad_x (pg, p, r2, dat->poly[0]);
-  p = svmla_x (pg, r, r2, svmul_x (pg, p, r));
-
-  /* Recombination uses double-angle formula:
-     tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
-     and reciprocity around pi/2:
-     tan(x) = 1 / (tan(pi/2 - x))
-     to assemble result using change-of-sign and conditional selection of
-     numerator/denominator dependent on odd/even-ness of q (hence quadrant).  */
-  svbool_t use_recip
-      = svcmpeq (pg, svand_x (pg, svreinterpret_u64 (qi), 1), 0);
-
-  svfloat64_t n = svmad_x (pg, p, p, -1);
-  svfloat64_t d = svmul_x (pg, p, 2);
-  svfloat64_t swap = n;
-  n = svneg_m (n, use_recip, d);
-  d = svsel (use_recip, swap, d);
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svdiv_x (svnot_z (pg, special), n, d), special);
-  return svdiv_x (pg, n, d);
-}
-
-PL_SIG (SV, D, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_D1 (tan), 2.99)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0, 0x1p23, 500000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0x1p23, inf, 5000)
diff --git a/pl/math/sv_tanhf_2u6.c b/pl/math/sv_tanhf_2u6.c
deleted file mode 100644
index 988a56de0b2e59..00000000000000
--- a/pl/math/sv_tanhf_2u6.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Single-precision SVE tanh(x) function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#include "sv_expm1f_inline.h"
-
-static const struct data
-{
-  struct sv_expm1f_data expm1f_consts;
-  uint32_t boring_bound, onef;
-} data = {
-  .expm1f_consts = SV_EXPM1F_DATA,
-  /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative).  */
-  .boring_bound = 0x41102cb3,
-  .onef = 0x3f800000,
-};
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (tanhf, x, y, special);
-}
-
-/* Approximation for single-precision SVE tanh(x), using a simplified
-   version of expm1f. The maximum error is 2.57 ULP:
-   _ZGVsMxv_tanhf (0x1.fc1832p-5) got 0x1.fb71a4p-5
-				 want 0x1.fb71aap-5.  */
-svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  svfloat32_t ax = svabs_x (pg, x);
-  svuint32_t iax = svreinterpret_u32 (ax);
-  svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
-  svbool_t is_boring = svcmpgt (pg, iax, d->boring_bound);
-  svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef));
-
-  svbool_t special = svcmpgt (pg, iax, 0x7f800000);
-
-  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
-  svfloat32_t q = expm1f_inline (svmul_x (pg, x, 2.0), pg, &d->expm1f_consts);
-  svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0));
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svsel_f32 (is_boring, boring, y), special);
-  return svsel_f32 (is_boring, boring, y);
-}
-
-PL_SIG (SV, F, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (tanh), 2.07)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0, 0x1p-23, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0x1p-23, 0x1.205966p+3, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0x1.205966p+3, inf, 100)
diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h
deleted file mode 100644
index f2710a979d4097..00000000000000
--- a/pl/math/test/mathbench_funcs.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// clang-format off
-/*
- * Function entries for mathbench.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#define _ZSF1(fun, a, b) F(fun##f, a, b)
-#define _ZSD1(f, a, b) D(f, a, b)
-
-#if defined(__vpcs) && __aarch64__
-
-#define _ZVF1(fun, a, b) VNF(_ZGVnN4v_##fun##f, a, b)
-#define _ZVD1(f, a, b) VND(_ZGVnN2v_##f, a, b)
-
-#else
-
-#define _ZVF1(f, a, b)
-#define _ZVD1(f, a, b)
-
-#endif
-
-#if WANT_SVE_MATH
-
-#define _ZSVF1(fun, a, b) SVF(_ZGVsMxv_##fun##f, a, b)
-#define _ZSVD1(f, a, b) SVD(_ZGVsMxv_##f, a, b)
-
-#else
-
-#define _ZSVF1(f, a, b)
-#define _ZSVD1(f, a, b)
-
-#endif
-
-/* No auto-generated wrappers for binary functions - they have be
-   manually defined in mathbench_wrappers.h. We have to define silent
-   macros for them anyway as they will be emitted by PL_SIG.  */
-#define _ZSF2(...)
-#define _ZSD2(...)
-#define _ZVF2(...)
-#define _ZVD2(...)
-#define _ZSVF2(...)
-#define _ZSVD2(...)
-
-#include "mathbench_funcs_gen.h"
-
-/* PL_SIG only emits entries for unary functions, since if a function
-   needs to be wrapped in mathbench there is no way for it to know the
-   same of the wrapper. Add entries for binary functions, or any other
-   exotic signatures that need wrapping, below.  */
-
-{"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}},
-{"atan2",  'd', 0, -10.0, 10.0, {.d = atan2_wrap}},
-{"powi",   'd', 0,  0.01, 11.1, {.d = powi_wrap}},
-
-{"_ZGVnN4vv_atan2f", 'f', 'n', -10.0, 10.0, {.vnf = _Z_atan2f_wrap}},
-{"_ZGVnN2vv_atan2",  'd', 'n', -10.0, 10.0, {.vnd = _Z_atan2_wrap}},
-{"_ZGVnN4vv_hypotf", 'f', 'n', -10.0, 10.0, {.vnf = _Z_hypotf_wrap}},
-{"_ZGVnN2vv_hypot",  'd', 'n', -10.0, 10.0, {.vnd = _Z_hypot_wrap}},
-{"_ZGVnN2vv_pow",    'd', 'n', -10.0, 10.0, {.vnd = xy_Z_pow}},
-{"x_ZGVnN2vv_pow",   'd', 'n', -10.0, 10.0, {.vnd = x_Z_pow}},
-{"y_ZGVnN2vv_pow",   'd', 'n', -10.0, 10.0, {.vnd = y_Z_pow}},
-{"_ZGVnN4vl4l4_sincosf", 'f', 'n', -3.1, 3.1, {.vnf = _Z_sincosf_wrap}},
-{"_ZGVnN2vl8l8_sincos", 'd', 'n', -3.1, 3.1, {.vnd = _Z_sincos_wrap}},
-{"_ZGVnN4v_cexpif", 'f', 'n', -3.1, 3.1, {.vnf = _Z_cexpif_wrap}},
-{"_ZGVnN2v_cexpi", 'd', 'n', -3.1, 3.1, {.vnd = _Z_cexpi_wrap}},
-
-#if WANT_SVE_MATH
-{"_ZGVsMxvv_atan2f", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_atan2f_wrap}},
-{"_ZGVsMxvv_atan2",  'd', 's', -10.0, 10.0, {.svd = _Z_sv_atan2_wrap}},
-{"_ZGVsMxvv_hypotf", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_hypotf_wrap}},
-{"_ZGVsMxvv_hypot",  'd', 's', -10.0, 10.0, {.svd = _Z_sv_hypot_wrap}},
-{"_ZGVsMxvv_powi",   'f', 's', -10.0, 10.0, {.svf = _Z_sv_powi_wrap}},
-{"_ZGVsMxvv_powk",   'd', 's', -10.0, 10.0, {.svd = _Z_sv_powk_wrap}},
-{"_ZGVsMxvv_powf",   'f', 's', -10.0, 10.0, {.svf = xy_Z_sv_powf}},
-{"x_ZGVsMxvv_powf",  'f', 's', -10.0, 10.0, {.svf = x_Z_sv_powf}},
-{"y_ZGVsMxvv_powf",  'f', 's', -10.0, 10.0, {.svf = y_Z_sv_powf}},
-{"_ZGVsMxvv_pow",    'd', 's', -10.0, 10.0, {.svd = xy_Z_sv_pow}},
-{"x_ZGVsMxvv_pow",   'd', 's', -10.0, 10.0, {.svd = x_Z_sv_pow}},
-{"y_ZGVsMxvv_pow",   'd', 's', -10.0, 10.0, {.svd = y_Z_sv_pow}},
-{"_ZGVsMxvl4l4_sincosf", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_sincosf_wrap}},
-{"_ZGVsMxvl8l8_sincos", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_sincos_wrap}},
-{"_ZGVsMxv_cexpif", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_cexpif_wrap}},
-{"_ZGVsMxv_cexpi", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_cexpi_wrap}},
-#endif
-    // clang-format on
diff --git a/pl/math/test/mathbench_wrappers.h b/pl/math/test/mathbench_wrappers.h
deleted file mode 100644
index fe7f8963cdeee5..00000000000000
--- a/pl/math/test/mathbench_wrappers.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Function wrappers for mathbench.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-static double
-atan2_wrap (double x)
-{
-  return atan2 (5.0, x);
-}
-
-static float
-atan2f_wrap (float x)
-{
-  return atan2f (5.0f, x);
-}
-
-static double
-powi_wrap (double x)
-{
-  return __builtin_powi (x, (int) round (x));
-}
-
-#if __aarch64__ && defined(__vpcs)
-
-__vpcs static v_double
-_Z_atan2_wrap (v_double x)
-{
-  return _ZGVnN2vv_atan2 (v_double_dup (5.0), x);
-}
-
-__vpcs static v_float
-_Z_atan2f_wrap (v_float x)
-{
-  return _ZGVnN4vv_atan2f (v_float_dup (5.0f), x);
-}
-
-__vpcs static v_float
-_Z_hypotf_wrap (v_float x)
-{
-  return _ZGVnN4vv_hypotf (v_float_dup (5.0f), x);
-}
-
-__vpcs static v_double
-_Z_hypot_wrap (v_double x)
-{
-  return _ZGVnN2vv_hypot (v_double_dup (5.0), x);
-}
-
-__vpcs static v_double
-xy_Z_pow (v_double x)
-{
-  return _ZGVnN2vv_pow (x, x);
-}
-
-__vpcs static v_double
-x_Z_pow (v_double x)
-{
-  return _ZGVnN2vv_pow (x, v_double_dup (23.4));
-}
-
-__vpcs static v_double
-y_Z_pow (v_double x)
-{
-  return _ZGVnN2vv_pow (v_double_dup (2.34), x);
-}
-
-__vpcs static v_float
-_Z_sincosf_wrap (v_float x)
-{
-  v_float s, c;
-  _ZGVnN4vl4l4_sincosf (x, &s, &c);
-  return s + c;
-}
-
-__vpcs static v_float
-_Z_cexpif_wrap (v_float x)
-{
-  __f32x4x2_t sc = _ZGVnN4v_cexpif (x);
-  return sc.val[0] + sc.val[1];
-}
-
-__vpcs static v_double
-_Z_sincos_wrap (v_double x)
-{
-  v_double s, c;
-  _ZGVnN2vl8l8_sincos (x, &s, &c);
-  return s + c;
-}
-
-__vpcs static v_double
-_Z_cexpi_wrap (v_double x)
-{
-  __f64x2x2_t sc = _ZGVnN2v_cexpi (x);
-  return sc.val[0] + sc.val[1];
-}
-
-#endif // __arch64__ && __vpcs
-
-#if WANT_SVE_MATH
-
-static sv_float
-_Z_sv_atan2f_wrap (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_atan2f (x, svdup_f32 (5.0f), pg);
-}
-
-static sv_double
-_Z_sv_atan2_wrap (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_atan2 (x, svdup_f64 (5.0), pg);
-}
-
-static sv_float
-_Z_sv_hypotf_wrap (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_hypotf (x, svdup_f32 (5.0), pg);
-}
-
-static sv_double
-_Z_sv_hypot_wrap (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_hypot (x, svdup_f64 (5.0), pg);
-}
-
-static sv_float
-_Z_sv_powi_wrap (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_powi (x, svcvt_s32_f32_x (pg, x), pg);
-}
-
-static sv_double
-_Z_sv_powk_wrap (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_powk (x, svcvt_s64_f64_x (pg, x), pg);
-}
-
-static sv_float
-xy_Z_sv_powf (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_powf (x, x, pg);
-}
-
-static sv_float
-x_Z_sv_powf (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_powf (x, svdup_f32 (23.4f), pg);
-}
-
-static sv_float
-y_Z_sv_powf (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_powf (svdup_f32 (2.34f), x, pg);
-}
-
-static sv_double
-xy_Z_sv_pow (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_pow (x, x, pg);
-}
-
-static sv_double
-x_Z_sv_pow (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_pow (x, svdup_f64 (23.4), pg);
-}
-
-static sv_double
-y_Z_sv_pow (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_pow (svdup_f64 (2.34), x, pg);
-}
-
-static sv_float
-_Z_sv_sincosf_wrap (sv_float x, sv_bool pg)
-{
-  float s[svcntw ()], c[svcntw ()];
-  _ZGVsMxvl4l4_sincosf (x, s, c, pg);
-  return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
-}
-
-static sv_float
-_Z_sv_cexpif_wrap (sv_float x, sv_bool pg)
-{
-  svfloat32x2_t sc = _ZGVsMxv_cexpif (x, pg);
-  return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
-}
-
-static sv_double
-_Z_sv_sincos_wrap (sv_double x, sv_bool pg)
-{
-  double s[svcntd ()], c[svcntd ()];
-  _ZGVsMxvl8l8_sincos (x, s, c, pg);
-  return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
-}
-
-static sv_double
-_Z_sv_cexpi_wrap (sv_double x, sv_bool pg)
-{
-  svfloat64x2_t sc = _ZGVsMxv_cexpi (x, pg);
-  return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
-}
-
-#endif // WANT_SVE_MATH
diff --git a/pl/math/test/pl_test.h b/pl/math/test/pl_test.h
deleted file mode 100644
index e7ed4eed634e87..00000000000000
--- a/pl/math/test/pl_test.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * PL macros for emitting various details about routines for consumption by
- * runulp.sh.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
- */
-
-/* Emit the max ULP threshold, l, for routine f. Piggy-back PL_TEST_EXPECT_FENV
-   on PL_TEST_ULP to add EXPECT_FENV to all scalar routines.  */
-#if WANT_VMATH || defined(IGNORE_SCALAR_FENV)
-# define PL_TEST_ULP(f, l) PL_TEST_ULP f l
-#else
-# define PL_TEST_ULP(f, l)                                                   \
-    PL_TEST_EXPECT_FENV_ALWAYS (f)                                            \
-    PL_TEST_ULP f l
-#endif
-
-/* Emit routine name if e == 1 and f is expected to correctly trigger fenv
-   exceptions. e allows declaration to be emitted conditionally upon certain
-   build flags - defer expansion by one pass to allow those flags to be expanded
-   properly.  */
-#define PL_TEST_EXPECT_FENV(f, e) PL_TEST_EXPECT_FENV_ (f, e)
-#define PL_TEST_EXPECT_FENV_(f, e) PL_TEST_EXPECT_FENV_##e (f)
-#define PL_TEST_EXPECT_FENV_1(f) PL_TEST_EXPECT_FENV_ENABLED f
-#define PL_TEST_EXPECT_FENV_ALWAYS(f) PL_TEST_EXPECT_FENV (f, 1)
-
-#define PL_TEST_INTERVAL(f, lo, hi, n) PL_TEST_INTERVAL f lo hi n
-#define PL_TEST_SYM_INTERVAL(f, lo, hi, n)                                    \
-  PL_TEST_INTERVAL (f, lo, hi, n)                                             \
-  PL_TEST_INTERVAL (f, -lo, -hi, n)
-#define PL_TEST_INTERVAL_C(f, lo, hi, n, c) PL_TEST_INTERVAL f lo hi n c
-#define PL_TEST_SYM_INTERVAL_C(f, lo, hi, n, c)                               \
-  PL_TEST_INTERVAL_C (f, lo, hi, n, c)                                        \
-  PL_TEST_INTERVAL_C (f, -lo, -hi, n, c)
-// clang-format off
-#define PL_TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)                            \
-  PL_TEST_INTERVAL f xlo,ylo xhi,yhi n
-// clang-format on
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
deleted file mode 100755
index 0f5a41f76b25c7..00000000000000
--- a/pl/math/test/runulp.sh
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/bin/bash
-
-# ULP error check script.
-#
-# Copyright (c) 2019-2023, Arm Limited.
-# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-#set -x
-set -eu
-
-# cd to bin directory.
-cd "${0%/*}"
-
-flags="${ULPFLAGS:--q}"
-emu="$@"
-
-# Enable SVE testing
-WANT_SVE_MATH=${WANT_SVE_MATH:-0}
-
-FAIL=0
-PASS=0
-
-t() {
-	routine=$1
-	L=$(cat $LIMITS | grep "^$routine " | awk '{print $2}')
-	[[ $L =~ ^[0-9]+\.[0-9]+$ ]]
-	extra_flags=
-	[[ -z "${5:-}" ]] || extra_flags="$extra_flags -c $5"
-	grep -q "^$routine$" $FENV || extra_flags="$extra_flags -f"
-	IFS=',' read -ra LO <<< "$2"
-	IFS=',' read -ra HI <<< "$3"
-	ITV="${LO[0]} ${HI[0]}"
-	for i in "${!LO[@]}"; do
-	[[ "$i" -eq "0" ]] || ITV="$ITV x ${LO[$i]} ${HI[$i]}"
-	done
-	# Add -z flag to ignore zero sign for vector routines
-	{ echo $routine | grep -q "ZGV"; } && extra_flags="$extra_flags -z"
-	$emu ./ulp -e $L $flags ${extra_flags} $routine $ITV $4 && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
-}
-
-check() {
-	$emu ./ulp -f -q "$@" #>/dev/null
-}
-
-if [ "$FUNC" == "atan2" ] || [ -z "$FUNC" ]; then
-    # Regression-test for correct NaN handling in atan2
-    check atan2 0x1p-1022 0x1p-1000 x 0 0x1p-1022 40000
-    check atan2 0x1.7887a0a717aefp+1017 0x1.7887a0a717aefp+1017 x -nan -nan
-    check atan2 nan nan x -nan -nan
-fi
-
-# vector functions
-flags="${ULPFLAGS:--q}"
-runsv=
-if [ $WANT_SVE_MATH -eq 1 ]; then
-# No guarantees about powi accuracy, so regression-test for exactness
-# w.r.t. the custom reference impl in ulp_wrappers.h
-check -q -f -e 0 _ZGVsMxvv_powi  0  inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi  0  inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk  0  inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk  0  inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x -0 -1000 100000 && runsv=1
-fi
-
-while read F LO HI N C
-do
-	t $F $LO $HI $N $C
-done << EOF
-$(cat $INTERVALS | grep "\b$FUNC\b")
-EOF
-
-[ 0 -eq $FAIL ] || {
-	echo "FAILED $FAIL PASSED $PASS"
-	exit 1
-}
diff --git a/pl/math/test/testcases/directed/erff.tst b/pl/math/test/testcases/directed/erff.tst
deleted file mode 100644
index 9b1d3d5114ae31..00000000000000
--- a/pl/math/test/testcases/directed/erff.tst
+++ /dev/null
@@ -1,17 +0,0 @@
-; erff.tst
-;
-; Copyright (c) 2007-2023, Arm Limited.
-; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-func=erff op1=7fc00001 result=7fc00001 errno=0
-func=erff op1=ffc00001 result=7fc00001 errno=0
-func=erff op1=7f800001 result=7fc00001 errno=0 status=i
-func=erff op1=ff800001 result=7fc00001 errno=0 status=i
-func=erff op1=7f800000 result=3f800000 errno=0
-func=erff op1=ff800000 result=bf800000 errno=0
-func=erff op1=00000000 result=00000000 errno=ERANGE
-func=erff op1=80000000 result=80000000 errno=ERANGE
-func=erff op1=00000001 result=00000001 errno=0 status=ux
-func=erff op1=80000001 result=80000001 errno=0 status=ux
-func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0
-func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0
diff --git a/pl/math/test/testcases/directed/log2.tst b/pl/math/test/testcases/directed/log2.tst
deleted file mode 100644
index 5d1eb9b877e804..00000000000000
--- a/pl/math/test/testcases/directed/log2.tst
+++ /dev/null
@@ -1,21 +0,0 @@
-; Directed test cases for log2
-;
-; Copyright (c) 2018-2023, Arm Limited.
-; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
-func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
-func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
-func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
-func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
-func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
-func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0
-func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
-func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0
-func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
-func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
-func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
-func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0
-func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
-func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0
-func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0
diff --git a/pl/math/test/testcases/directed/log2f.tst b/pl/math/test/testcases/directed/log2f.tst
deleted file mode 100644
index 4e08110878d69f..00000000000000
--- a/pl/math/test/testcases/directed/log2f.tst
+++ /dev/null
@@ -1,27 +0,0 @@
-; log2f.tst - Directed test cases for log2f
-;
-; Copyright (c) 2017-2023, Arm Limited.
-; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-func=log2f op1=7fc00001 result=7fc00001 errno=0
-func=log2f op1=ffc00001 result=7fc00001 errno=0
-func=log2f op1=7f800001 result=7fc00001 errno=0 status=i
-func=log2f op1=ff800001 result=7fc00001 errno=0 status=i
-func=log2f op1=ff810000 result=7fc00001 errno=0 status=i
-func=log2f op1=7f800000 result=7f800000 errno=0
-func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i
-func=log2f op1=3f800000 result=00000000 errno=0
-func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z
-func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z
-func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i
-
-func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0
-func=log2f op1=3f604189 result=be4394c8.395 error=0
-func=log2f op1=3f278034 result=bf1caa73.88e error=0
-func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0
-func=log2f op1=3e61259a result=c00bdb95.650 error=0
-func=log2f op1=3f8147ae result=3c6b3267.d6a error=0
-func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0
-func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0
-func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0
-func=log2f op1=40070838 result=3f89e055.a0a error=0
diff --git a/pl/math/test/testcases/random/double.tst b/pl/math/test/testcases/random/double.tst
deleted file mode 100644
index d83283ef78649b..00000000000000
--- a/pl/math/test/testcases/random/double.tst
+++ /dev/null
@@ -1,6 +0,0 @@
-!! double.tst - Random test case specification for DP functions
-!!
-!! Copyright (c) 1999-2023, Arm Limited.
-!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-test log10 10000
diff --git a/pl/math/test/testcases/random/float.tst b/pl/math/test/testcases/random/float.tst
deleted file mode 100644
index fa77efecfabb7a..00000000000000
--- a/pl/math/test/testcases/random/float.tst
+++ /dev/null
@@ -1,8 +0,0 @@
-!! float.tst - Random test case specification for SP functions
-!!
-!! Copyright (c) 2022-2023, Arm Limited.
-!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-test erff 10000
-test log10f 10000
-test tanf 10000
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
deleted file mode 100644
index 4929b481ffe1a2..00000000000000
--- a/pl/math/test/ulp_funcs.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Function entries for ulp.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#if defined(__vpcs) && __aarch64__
-
-#define _ZVF1(f) ZVF1 (f)
-#define _ZVD1(f) ZVD1 (f)
-#define _ZVF2(f) ZVF2 (f)
-#define _ZVD2(f) ZVD2 (f)
-
-#else
-
-#define _ZVF1(f)
-#define _ZVD1(f)
-#define _ZVF2(f)
-#define _ZVD2(f)
-
-#endif
-
-#if WANT_SVE_MATH
-
-#define _ZSVF1(f) ZSVF1 (f)
-#define _ZSVF2(f) ZSVF2 (f)
-#define _ZSVD1(f) ZSVD1 (f)
-#define _ZSVD2(f) ZSVD2 (f)
-
-#else
-
-#define _ZSVF1(f)
-#define _ZSVF2(f)
-#define _ZSVD1(f)
-#define _ZSVD2(f)
-
-#endif
-
-#define _ZSF1(f) F1 (f)
-#define _ZSF2(f) F2 (f)
-#define _ZSD1(f) D1 (f)
-#define _ZSD2(f) D2 (f)
-
-#include "ulp_funcs_gen.h"
-
-F (_ZGVnN4v_sincosf_sin, v_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVnN4v_sincosf_cos, v_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
-F (_ZGVnN4v_cexpif_sin, v_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVnN4v_cexpif_cos, v_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
-
-F (_ZGVnN2v_sincos_sin, v_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVnN2v_sincos_cos, v_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-F (_ZGVnN2v_cexpi_sin, v_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVnN2v_cexpi_cos, v_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-
-#if WANT_SVE_MATH
-F (_ZGVsMxvv_powk, Z_sv_powk, ref_powi, mpfr_powi, 2, 0, d2, 0)
-F (_ZGVsMxvv_powi, Z_sv_powi, ref_powif, mpfr_powi, 2, 1, f2, 0)
-
-F (_ZGVsMxv_sincosf_sin, sv_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVsMxv_sincosf_cos, sv_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
-F (_ZGVsMxv_cexpif_sin, sv_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVsMxv_cexpif_cos, sv_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
-
-F (_ZGVsMxv_sincos_sin, sv_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVsMxv_sincos_cos, sv_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-F (_ZGVsMxv_cexpi_sin, sv_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVsMxv_cexpi_cos, sv_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-#endif
diff --git a/pl/math/test/ulp_wrappers.h b/pl/math/test/ulp_wrappers.h
deleted file mode 100644
index 0f7b68949c7bfb..00000000000000
--- a/pl/math/test/ulp_wrappers.h
+++ /dev/null
@@ -1,140 +0,0 @@
-// clang-format off
-/*
- * Function wrappers for ulp.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#define _GNU_SOURCE
-#include <stdbool.h>
-#include <arm_neon.h>
-
-#if USE_MPFR
-static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
-  mpfr_cos(y, x, r);
-  return mpfr_sin(y, x, r);
-}
-static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
-  mpfr_sin(y, x, r);
-  return mpfr_cos(y, x, r);
-}
-static int wrap_mpfr_powi(mpfr_t ret, const mpfr_t x, const mpfr_t y, mpfr_rnd_t rnd) {
-  mpfr_t y2;
-  mpfr_init(y2);
-  mpfr_trunc(y2, y);
-  return mpfr_pow(ret, x, y2, rnd);
-}
-#endif
-
-/* Our implementations of powi/powk are too imprecise to verify
-   against any established pow implementation. Instead we have the
-   following simple implementation, against which it is enough to
-   maintain bitwise reproducibility. Note the test framework expects
-   the reference impl to be of higher precision than the function
-   under test. For instance this means that the reference for
-   double-precision powi will be passed a long double, so to check
-   bitwise reproducibility we have to cast it back down to
-   double. This is fine since a round-trip to higher precision and
-   back down is correctly rounded.  */
-#define DECL_POW_INT_REF(NAME, DBL_T, FLT_T, INT_T)                            \
-  static DBL_T __attribute__((unused)) NAME (DBL_T in_val, DBL_T y)            \
-  {                                                                            \
-    INT_T n = (INT_T) round (y);                                               \
-    FLT_T acc = 1.0;                                                           \
-    bool want_recip = n < 0;                                                   \
-    n = n < 0 ? -n : n;                                                        \
-                                                                               \
-    for (FLT_T c = in_val; n; c *= c, n >>= 1)                                 \
-      {                                                                        \
-        if (n & 0x1)                                                           \
-          {                                                                    \
-            acc *= c;                                                          \
-          }                                                                    \
-      }                                                                        \
-    if (want_recip)                                                            \
-      {                                                                        \
-        acc = 1.0 / acc;                                                       \
-      }                                                                        \
-    return acc;                                                                \
-  }
-
-DECL_POW_INT_REF(ref_powif, double, float, int)
-DECL_POW_INT_REF(ref_powi, long double, double, int)
-
-#define ZVF1_WRAP(func) static float Z_##func##f(float x) { return _ZGVnN4v_##func##f(argf(x))[0]; }
-#define ZVF2_WRAP(func) static float Z_##func##f(float x, float y) { return _ZGVnN4vv_##func##f(argf(x), argf(y))[0]; }
-#define ZVD1_WRAP(func) static double Z_##func(double x) { return _ZGVnN2v_##func(argd(x))[0]; }
-#define ZVD2_WRAP(func) static double Z_##func(double x, double y) { return _ZGVnN2vv_##func(argd(x), argd(y))[0]; }
-
-#if defined(__vpcs) && __aarch64__
-
-#define ZVNF1_WRAP(func) ZVF1_WRAP(func)
-#define ZVNF2_WRAP(func) ZVF2_WRAP(func)
-#define ZVND1_WRAP(func) ZVD1_WRAP(func)
-#define ZVND2_WRAP(func) ZVD2_WRAP(func)
-
-#else
-
-#define ZVNF1_WRAP(func)
-#define ZVNF2_WRAP(func)
-#define ZVND1_WRAP(func)
-#define ZVND2_WRAP(func)
-
-#endif
-
-#define ZSVF1_WRAP(func) static float Z_sv_##func##f(float x) { return svretf(_ZGVsMxv_##func##f(svargf(x), svptrue_b32())); }
-#define ZSVF2_WRAP(func) static float Z_sv_##func##f(float x, float y) { return svretf(_ZGVsMxvv_##func##f(svargf(x), svargf(y), svptrue_b32())); }
-#define ZSVD1_WRAP(func) static double Z_sv_##func(double x) { return svretd(_ZGVsMxv_##func(svargd(x), svptrue_b64())); }
-#define ZSVD2_WRAP(func) static double Z_sv_##func(double x, double y) { return svretd(_ZGVsMxvv_##func(svargd(x), svargd(y), svptrue_b64())); }
-
-#if WANT_SVE_MATH
-
-#define ZSVNF1_WRAP(func) ZSVF1_WRAP(func)
-#define ZSVNF2_WRAP(func) ZSVF2_WRAP(func)
-#define ZSVND1_WRAP(func) ZSVD1_WRAP(func)
-#define ZSVND2_WRAP(func) ZSVD2_WRAP(func)
-
-#else
-
-#define ZSVNF1_WRAP(func)
-#define ZSVNF2_WRAP(func)
-#define ZSVND1_WRAP(func)
-#define ZSVND2_WRAP(func)
-
-#endif
-
-/* No wrappers for scalar routines, but PL_SIG will emit them.  */
-#define ZSNF1_WRAP(func)
-#define ZSNF2_WRAP(func)
-#define ZSND1_WRAP(func)
-#define ZSND2_WRAP(func)
-
-#include "ulp_wrappers_gen.h"
-
-float v_sincosf_sin(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return s[0]; }
-float v_sincosf_cos(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return c[0]; }
-float v_cexpif_sin(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[0][0]; }
-float v_cexpif_cos(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[1][0]; }
-
-double v_sincos_sin(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return s[0]; }
-double v_sincos_cos(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return c[0]; }
-double v_cexpi_sin(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[0][0]; }
-double v_cexpi_cos(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[1][0]; }
-
-#if WANT_SVE_MATH
-static float Z_sv_powi(float x, float y) { return svretf(_ZGVsMxvv_powi(svargf(x), svdup_s32((int)round(y)), svptrue_b32())); }
-static double Z_sv_powk(double x, double y) { return svretd(_ZGVsMxvv_powk(svargd(x), svdup_s64((long)round(y)), svptrue_b64())); }
-
-float sv_sincosf_sin(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return s[0]; }
-float sv_sincosf_cos(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return c[0]; }
-float sv_cexpif_sin(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 0)); }
-float sv_cexpif_cos(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 1)); }
-
-double sv_sincos_sin(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return s[0]; }
-double sv_sincos_cos(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return c[0]; }
-double sv_cexpi_sin(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 0)); }
-double sv_cexpi_cos(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 1)); }
-
-#endif
-// clang-format on
diff --git a/pl/math/trigpi_references.c b/pl/math/trigpi_references.c
deleted file mode 100644
index 4b0514b6766a72..00000000000000
--- a/pl/math/trigpi_references.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Extended precision scalar reference functions for trigpi.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#define _GNU_SOURCE
-#include "math_config.h"
-#include "mathlib.h"
-
-long double
-sinpil (long double x)
-{
-  /* sin(inf) should return nan, as defined by C23.  */
-  if (isinf (x))
-    return __math_invalid (x);
-
-  long double ax = fabsl (x);
-
-  /* Return 0 for all values above 2^64 to prevent
-     overflow when casting to uint64_t.  */
-  if (ax >= 0x1p64)
-    return 0;
-
-  /* All integer cases should return 0.  */
-  if (ax == (uint64_t) ax)
-    return 0;
-
-  return sinl (x * M_PIl);
-}
-
-long double
-cospil (long double x)
-{
-  /* cos(inf) should return nan, as defined by C23.  */
-  if (isinf (x))
-    return __math_invalid (x);
-
-  long double ax = fabsl (x);
-
-  if (ax >= 0x1p64)
-    return 1;
-
-  uint64_t m = (uint64_t) ax;
-
-  /* Integer values of cospi(x) should return +/-1.
-    The sign depends on if x is odd or even.  */
-  if (m == ax)
-    return (m & 1) ? -1 : 1;
-
-  /* Values of Integer + 0.5 should always return 0.  */
-  if (ax - 0.5 == m || ax + 0.5 == m)
-    return 0;
-
-  return cosl (ax * M_PIl);
-}
\ No newline at end of file
diff --git a/pl/math/v_asinh_3u5.c b/pl/math/v_asinh_3u5.c
deleted file mode 100644
index 4862bef948617d..00000000000000
--- a/pl/math/v_asinh_3u5.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Double-precision vector asinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define A(i) v_f64 (__v_log_data.poly[i])
-#define N (1 << V_LOG_TABLE_BITS)
-
-const static struct data
-{
-  float64x2_t poly[18];
-  uint64x2_t off, huge_bound, abs_mask;
-  float64x2_t ln2, tiny_bound;
-} data = {
-  .off = V2 (0x3fe6900900000000),
-  .ln2 = V2 (0x1.62e42fefa39efp-1),
-  .huge_bound = V2 (0x5fe0000000000000),
-  .tiny_bound = V2 (0x1p-26),
-  .abs_mask = V2 (0x7fffffffffffffff),
-  /* Even terms of polynomial s.t. asinh(x) is approximated by
-     asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...).
-     Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2).  */
-  .poly = { V2 (-0x1.55555555554a7p-3), V2 (0x1.3333333326c7p-4),
-	    V2 (-0x1.6db6db68332e6p-5), V2 (0x1.f1c71b26fb40dp-6),
-	    V2 (-0x1.6e8b8b654a621p-6), V2 (0x1.1c4daa9e67871p-6),
-	    V2 (-0x1.c9871d10885afp-7), V2 (0x1.7a16e8d9d2ecfp-7),
-	    V2 (-0x1.3ddca533e9f54p-7), V2 (0x1.0becef748dafcp-7),
-	    V2 (-0x1.b90c7099dd397p-8), V2 (0x1.541f2bb1ffe51p-8),
-	    V2 (-0x1.d217026a669ecp-9), V2 (0x1.0b5c7977aaf7p-9),
-	    V2 (-0x1.e0f37daef9127p-11), V2 (0x1.388b5fe542a6p-12),
-	    V2 (-0x1.021a48685e287p-14), V2 (0x1.93d4ba83d34dap-18) },
-};
-
-static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
-{
-  return v_call_f64 (asinh, x, y, special);
-}
-
-struct entry
-{
-  float64x2_t invc;
-  float64x2_t logc;
-};
-
-static inline struct entry
-lookup (uint64x2_t i)
-{
-  float64x2_t e0 = vld1q_f64 (
-      &__v_log_data.table[(i[0] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
-  float64x2_t e1 = vld1q_f64 (
-      &__v_log_data.table[(i[1] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
-  return (struct entry){ vuzp1q_f64 (e0, e1), vuzp2q_f64 (e0, e1) };
-}
-
-static inline float64x2_t
-log_inline (float64x2_t x, const struct data *d)
-{
-  /* Double-precision vector log, copied from ordinary vector log with some
-     cosmetic modification and special-cases removed.  */
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint64x2_t tmp = vsubq_u64 (ix, d->off);
-  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
-  uint64x2_t iz
-      = vsubq_u64 (ix, vandq_u64 (tmp, vdupq_n_u64 (0xfffULL << 52)));
-  float64x2_t z = vreinterpretq_f64_u64 (iz);
-  struct entry e = lookup (tmp);
-  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
-  float64x2_t kd = vcvtq_f64_s64 (k);
-  float64x2_t hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
-  float64x2_t r2 = vmulq_f64 (r, r);
-  float64x2_t y = vfmaq_f64 (A (2), A (3), r);
-  float64x2_t p = vfmaq_f64 (A (0), A (1), r);
-  y = vfmaq_f64 (y, A (4), r2);
-  y = vfmaq_f64 (p, y, r2);
-  y = vfmaq_f64 (hi, y, r2);
-  return y;
-}
-
-/* Double-precision implementation of vector asinh(x).
-   asinh is very sensitive around 1, so it is impractical to devise a single
-   low-cost algorithm which is sufficiently accurate on a wide range of input.
-   Instead we use two different algorithms:
-   asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)      if |x| >= 1
-	    = sign(x) * (|x| + |x|^3 * P(x^2))       otherwise
-   where log(x) is an optimized log approximation, and P(x) is a polynomial
-   shared with the scalar routine. The greatest observed error 3.29 ULP, in
-   |x| >= 1:
-   __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1
-				  want 0x1.ffffcfd0e2352p-1.  */
-VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  float64x2_t ax = vabsq_f64 (x);
-  uint64x2_t iax = vreinterpretq_u64_f64 (ax);
-
-  uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1));
-  uint64x2_t special = vcgeq_u64 (iax, d->huge_bound);
-
-#if WANT_SIMD_EXCEPT
-  uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound);
-  special = vorrq_u64 (special, tiny);
-#endif
-
-  /* Option 1: |x| >= 1.
-     Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).
-     If WANT_SIMD_EXCEPT is enabled, sidestep special values, which will
-     overflow, by setting special lanes to 1. These will be fixed later.  */
-  float64x2_t option_1 = v_f64 (0);
-  if (likely (v_any_u64 (gt1)))
-    {
-#if WANT_SIMD_EXCEPT
-      float64x2_t xm = v_zerofy_f64 (ax, special);
-#else
-      float64x2_t xm = ax;
-#endif
-      option_1 = log_inline (
-	  vaddq_f64 (xm, vsqrtq_f64 (vfmaq_f64 (v_f64 (1), xm, xm))), d);
-    }
-
-  /* Option 2: |x| < 1.
-     Compute asinh(x) using a polynomial.
-     If WANT_SIMD_EXCEPT is enabled, sidestep special lanes, which will
-     overflow, and tiny lanes, which will underflow, by setting them to 0. They
-     will be fixed later, either by selecting x or falling back to the scalar
-     special-case. The largest observed error in this region is 1.47 ULPs:
-     __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
-				    want 0x1.c1d6bf874019cp-1.  */
-  float64x2_t option_2 = v_f64 (0);
-  if (likely (v_any_u64 (vceqzq_u64 (gt1))))
-    {
-#if WANT_SIMD_EXCEPT
-      ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1));
-#endif
-      float64x2_t x2 = vmulq_f64 (ax, ax), x3 = vmulq_f64 (ax, x2),
-		  z2 = vmulq_f64 (x2, x2), z4 = vmulq_f64 (z2, z2),
-		  z8 = vmulq_f64 (z4, z4), z16 = vmulq_f64 (z8, z8);
-      float64x2_t p = v_estrin_17_f64 (x2, z2, z4, z8, z16, d->poly);
-      option_2 = vfmaq_f64 (ax, p, x3);
-#if WANT_SIMD_EXCEPT
-      option_2 = vbslq_f64 (tiny, x, option_2);
-#endif
-    }
-
-  /* Choose the right option for each lane.  */
-  float64x2_t y = vbslq_f64 (gt1, option_1, option_2);
-  /* Copy sign.  */
-  y = vbslq_f64 (d->abs_mask, y, x);
-
-  if (unlikely (v_any_u64 (special)))
-    return special_case (x, y, special);
-  return y;
-}
-
-PL_SIG (V, D, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (asinh), 2.80)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (asinh), WANT_SIMD_EXCEPT)
-/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
-   Ensures the v_sel is choosing the right option in all cases.  */
-#define V_ASINH_INTERVAL(lo, hi, n)                                           \
-  PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 0.5)                  \
-  PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 2)                    \
-  PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 0x1p600)
-V_ASINH_INTERVAL (0, 0x1p-26, 50000)
-V_ASINH_INTERVAL (0x1p-26, 1, 50000)
-V_ASINH_INTERVAL (1, 0x1p511, 50000)
-V_ASINH_INTERVAL (0x1p511, inf, 40000)
diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
deleted file mode 100644
index 1723ba90d2f301..00000000000000
--- a/pl/math/v_asinhf_2u7.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Single-precision vector asinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "v_log1pf_inline.h"
-
-#define SignMask v_u32 (0x80000000)
-
-const static struct data
-{
-  struct v_log1pf_data log1pf_consts;
-  uint32x4_t big_bound;
-#if WANT_SIMD_EXCEPT
-  uint32x4_t tiny_bound;
-#endif
-} data = {
-  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
-  .big_bound = V4 (0x5f800000), /* asuint(0x1p64).  */
-#if WANT_SIMD_EXCEPT
-  .tiny_bound = V4 (0x30800000) /* asuint(0x1p-30).  */
-#endif
-};
-
-static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
-{
-  return v_call_f32 (asinhf, x, y, special);
-}
-
-/* Single-precision implementation of vector asinh(x), using vector log1p.
-   Worst-case error is 2.66 ULP, at roughly +/-0.25:
-   __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3.  */
-VPCS_ATTR float32x4_t V_NAME_F1 (asinh) (float32x4_t x)
-{
-  const struct data *dat = ptr_barrier (&data);
-  uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask);
-  float32x4_t ax = vreinterpretq_f32_u32 (iax);
-  uint32x4_t special = vcgeq_u32 (iax, dat->big_bound);
-  float32x4_t special_arg = x;
-
-#if WANT_SIMD_EXCEPT
-  /* Sidestep tiny and large values to avoid inadvertently triggering
-     under/overflow.  */
-  special = vorrq_u32 (special, vcltq_u32 (iax, dat->tiny_bound));
-  if (unlikely (v_any_u32 (special)))
-    {
-      ax = v_zerofy_f32 (ax, special);
-      x = v_zerofy_f32 (x, special);
-    }
-#endif
-
-  /* asinh(x) = log(x + sqrt(x * x + 1)).
-     For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))).  */
-  float32x4_t d
-      = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x)));
-  float32x4_t y = log1pf_inline (
-      vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts);
-
-  if (unlikely (v_any_u32 (special)))
-    return special_case (special_arg, vbslq_f32 (SignMask, x, y), special);
-  return vbslq_f32 (SignMask, x, y);
-}
-
-PL_SIG (V, F, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (asinh), 2.17)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (asinh), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0, 0x1p-12, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p-12, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), 1.0, 0x1p11, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p11, inf, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0, -0x1p-12, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p-12, -1.0, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), -1.0, -0x1p11, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p11, -inf, 20000)
diff --git a/pl/math/v_atan2_3u.c b/pl/math/v_atan2_3u.c
deleted file mode 100644
index f24667682dec00..00000000000000
--- a/pl/math/v_atan2_3u.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Double-precision vector atan2(x) function.
- *
- * Copyright (c) 2021-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
-
-static const struct data
-{
-  float64x2_t pi_over_2;
-  float64x2_t poly[20];
-} data = {
-  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
-     the interval [2**-1022, 1.0].  */
-  .poly = { V2 (-0x1.5555555555555p-2),	 V2 (0x1.99999999996c1p-3),
-	    V2 (-0x1.2492492478f88p-3),	 V2 (0x1.c71c71bc3951cp-4),
-	    V2 (-0x1.745d160a7e368p-4),	 V2 (0x1.3b139b6a88ba1p-4),
-	    V2 (-0x1.11100ee084227p-4),	 V2 (0x1.e1d0f9696f63bp-5),
-	    V2 (-0x1.aebfe7b418581p-5),	 V2 (0x1.842dbe9b0d916p-5),
-	    V2 (-0x1.5d30140ae5e99p-5),	 V2 (0x1.338e31eb2fbbcp-5),
-	    V2 (-0x1.00e6eece7de8p-5),	 V2 (0x1.860897b29e5efp-6),
-	    V2 (-0x1.0051381722a59p-6),	 V2 (0x1.14e9dc19a4a4ep-7),
-	    V2 (-0x1.d0062b42fe3bfp-9),	 V2 (0x1.17739e210171ap-10),
-	    V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), },
-  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
-};
-
-#define SignMask v_u64 (0x8000000000000000)
-
-/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls).  */
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t y, float64x2_t x, float64x2_t ret, uint64x2_t cmp)
-{
-  return v_call2_f64 (atan2, y, x, ret, cmp);
-}
-
-/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
-static inline uint64x2_t
-zeroinfnan (uint64x2_t i)
-{
-  /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1).  */
-  return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)),
-		    v_u64 (2 * asuint64 (INFINITY) - 1));
-}
-
-/* Fast implementation of vector atan2.
-   Maximum observed error is 2.8 ulps:
-   _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5)
-	got 0x1.92d628ab678ccp-1
-       want 0x1.92d628ab678cfp-1.  */
-float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
-{
-  const struct data *data_ptr = ptr_barrier (&data);
-
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint64x2_t iy = vreinterpretq_u64_f64 (y);
-
-  uint64x2_t special_cases = vorrq_u64 (zeroinfnan (ix), zeroinfnan (iy));
-
-  uint64x2_t sign_x = vandq_u64 (ix, SignMask);
-  uint64x2_t sign_y = vandq_u64 (iy, SignMask);
-  uint64x2_t sign_xy = veorq_u64 (sign_x, sign_y);
-
-  float64x2_t ax = vabsq_f64 (x);
-  float64x2_t ay = vabsq_f64 (y);
-
-  uint64x2_t pred_xlt0 = vcltzq_f64 (x);
-  uint64x2_t pred_aygtax = vcgtq_f64 (ay, ax);
-
-  /* Set up z for call to atan.  */
-  float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
-  float64x2_t d = vbslq_f64 (pred_aygtax, ay, ax);
-  float64x2_t z = vdivq_f64 (n, d);
-
-  /* Work out the correct shift.  */
-  float64x2_t shift = vreinterpretq_f64_u64 (
-      vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0))));
-  shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift);
-  shift = vmulq_f64 (shift, data_ptr->pi_over_2);
-
-  /* Calculate the polynomial approximation.
-     Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
-     full scheme to avoid underflow in x^16.
-     The order 19 polynomial P approximates
-     (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
-  float64x2_t z2 = vmulq_f64 (z, z);
-  float64x2_t x2 = vmulq_f64 (z2, z2);
-  float64x2_t x4 = vmulq_f64 (x2, x2);
-  float64x2_t x8 = vmulq_f64 (x4, x4);
-  float64x2_t ret
-      = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, data_ptr->poly),
-		   v_estrin_11_f64 (z2, x2, x4, x8, data_ptr->poly + 8), x8);
-
-  /* Finalize. y = shift + z + z^3 * P(z^2).  */
-  ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z));
-  ret = vaddq_f64 (ret, shift);
-
-  /* Account for the sign of x and y.  */
-  ret = vreinterpretq_f64_u64 (
-      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
-
-  if (unlikely (v_any_u64 (special_cases)))
-    return special_case (y, x, ret, special_cases);
-
-  return ret;
-}
-
-/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (V, D, 2, atan2)
-// TODO tighten this once __v_atan2 is fixed
-PL_TEST_ULP (V_NAME_D2 (atan2), 2.9)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), 1e6, 1e32, 40000)
diff --git a/pl/math/v_exp_data.c b/pl/math/v_exp_data.c
deleted file mode 100644
index fd01cf27606fa0..00000000000000
--- a/pl/math/v_exp_data.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Scale values for vector exp and exp2
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* 2^(j/N), j=0..N, N=2^7=128. Copied from math/v_exp_data.c.  */
-const uint64_t __v_exp_data[] = {
-  0x3ff0000000000000, 0x3feff63da9fb3335, 0x3fefec9a3e778061,
-  0x3fefe315e86e7f85, 0x3fefd9b0d3158574, 0x3fefd06b29ddf6de,
-  0x3fefc74518759bc8, 0x3fefbe3ecac6f383, 0x3fefb5586cf9890f,
-  0x3fefac922b7247f7, 0x3fefa3ec32d3d1a2, 0x3fef9b66affed31b,
-  0x3fef9301d0125b51, 0x3fef8abdc06c31cc, 0x3fef829aaea92de0,
-  0x3fef7a98c8a58e51, 0x3fef72b83c7d517b, 0x3fef6af9388c8dea,
-  0x3fef635beb6fcb75, 0x3fef5be084045cd4, 0x3fef54873168b9aa,
-  0x3fef4d5022fcd91d, 0x3fef463b88628cd6, 0x3fef3f49917ddc96,
-  0x3fef387a6e756238, 0x3fef31ce4fb2a63f, 0x3fef2b4565e27cdd,
-  0x3fef24dfe1f56381, 0x3fef1e9df51fdee1, 0x3fef187fd0dad990,
-  0x3fef1285a6e4030b, 0x3fef0cafa93e2f56, 0x3fef06fe0a31b715,
-  0x3fef0170fc4cd831, 0x3feefc08b26416ff, 0x3feef6c55f929ff1,
-  0x3feef1a7373aa9cb, 0x3feeecae6d05d866, 0x3feee7db34e59ff7,
-  0x3feee32dc313a8e5, 0x3feedea64c123422, 0x3feeda4504ac801c,
-  0x3feed60a21f72e2a, 0x3feed1f5d950a897, 0x3feece086061892d,
-  0x3feeca41ed1d0057, 0x3feec6a2b5c13cd0, 0x3feec32af0d7d3de,
-  0x3feebfdad5362a27, 0x3feebcb299fddd0d, 0x3feeb9b2769d2ca7,
-  0x3feeb6daa2cf6642, 0x3feeb42b569d4f82, 0x3feeb1a4ca5d920f,
-  0x3feeaf4736b527da, 0x3feead12d497c7fd, 0x3feeab07dd485429,
-  0x3feea9268a5946b7, 0x3feea76f15ad2148, 0x3feea5e1b976dc09,
-  0x3feea47eb03a5585, 0x3feea34634ccc320, 0x3feea23882552225,
-  0x3feea155d44ca973, 0x3feea09e667f3bcd, 0x3feea012750bdabf,
-  0x3fee9fb23c651a2f, 0x3fee9f7df9519484, 0x3fee9f75e8ec5f74,
-  0x3fee9f9a48a58174, 0x3fee9feb564267c9, 0x3feea0694fde5d3f,
-  0x3feea11473eb0187, 0x3feea1ed0130c132, 0x3feea2f336cf4e62,
-  0x3feea427543e1a12, 0x3feea589994cce13, 0x3feea71a4623c7ad,
-  0x3feea8d99b4492ed, 0x3feeaac7d98a6699, 0x3feeace5422aa0db,
-  0x3feeaf3216b5448c, 0x3feeb1ae99157736, 0x3feeb45b0b91ffc6,
-  0x3feeb737b0cdc5e5, 0x3feeba44cbc8520f, 0x3feebd829fde4e50,
-  0x3feec0f170ca07ba, 0x3feec49182a3f090, 0x3feec86319e32323,
-  0x3feecc667b5de565, 0x3feed09bec4a2d33, 0x3feed503b23e255d,
-  0x3feed99e1330b358, 0x3feede6b5579fdbf, 0x3feee36bbfd3f37a,
-  0x3feee89f995ad3ad, 0x3feeee07298db666, 0x3feef3a2b84f15fb,
-  0x3feef9728de5593a, 0x3feeff76f2fb5e47, 0x3fef05b030a1064a,
-  0x3fef0c1e904bc1d2, 0x3fef12c25bd71e09, 0x3fef199bdd85529c,
-  0x3fef20ab5fffd07a, 0x3fef27f12e57d14b, 0x3fef2f6d9406e7b5,
-  0x3fef3720dcef9069, 0x3fef3f0b555dc3fa, 0x3fef472d4a07897c,
-  0x3fef4f87080d89f2, 0x3fef5818dcfba487, 0x3fef60e316c98398,
-  0x3fef69e603db3285, 0x3fef7321f301b460, 0x3fef7c97337b9b5f,
-  0x3fef864614f5a129, 0x3fef902ee78b3ff6, 0x3fef9a51fbc74c83,
-  0x3fefa4afa2a490da, 0x3fefaf482d8e67f1, 0x3fefba1bee615a27,
-  0x3fefc52b376bba97, 0x3fefd0765b6e4540, 0x3fefdbfdad9cbe14,
-  0x3fefe7c1819e90d8, 0x3feff3c22b8f71f1,
-};
diff --git a/pl/math/v_exp_tail.h b/pl/math/v_exp_tail.h
deleted file mode 100644
index 903f1fd9571780..00000000000000
--- a/pl/math/v_exp_tail.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Constants for double-precision e^(x+tail) vector function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#define C1_scal 0x1.fffffffffffd4p-2
-#define C2_scal 0x1.5555571d6b68cp-3
-#define C3_scal 0x1.5555576a59599p-5
-#define InvLn2_scal 0x1.71547652b82fep8 /* N/ln2.  */
-#define Ln2hi_scal 0x1.62e42fefa39efp-9 /* ln2/N.  */
-#define Ln2lo_scal 0x1.abc9e3b39803f3p-64
-
-#define N (1 << V_EXP_TAIL_TABLE_BITS)
-#define Tab __v_exp_tail_data
-#define IndexMask_scal (N - 1)
-#define Shift_scal 0x1.8p+52
-#define Thres_scal 704.0
diff --git a/pl/math/v_exp_tail_inline.h b/pl/math/v_exp_tail_inline.h
deleted file mode 100644
index 76ecc6b0a33a28..00000000000000
--- a/pl/math/v_exp_tail_inline.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Double-precision vector e^(x+tail) function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-#ifndef PL_MATH_V_EXP_TAIL_INLINE_H
-#define PL_MATH_V_EXP_TAIL_INLINE_H
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-
-#ifndef WANT_V_EXP_TAIL_SPECIALCASE
-#error                                                                         \
-  "Cannot use v_exp_tail_inline.h without specifying whether you need the special case computation."
-#endif
-
-#define N (1 << V_EXP_TAIL_TABLE_BITS)
-
-static const struct data
-{
-  float64x2_t poly[4];
-#if WANT_V_EXP_TAIL_SPECIALCASE
-  float64x2_t big_bound, huge_bound;
-#endif
-  float64x2_t shift, invln2, ln2_hi, ln2_lo;
-} data = {
-#if WANT_V_EXP_TAIL_SPECIALCASE
-  .big_bound = V2 (704.0),
-  .huge_bound = V2 (1280.0 * N),
-#endif
-  .shift = V2 (0x1.8p52),
-  .invln2 = V2 (0x1.71547652b82fep8),  /* N/ln2.  */
-  .ln2_hi = V2 (0x1.62e42fefa39efp-9), /* ln2/N.  */
-  .ln2_lo = V2 (0x1.abc9e3b39803f3p-64),
-  .poly = { V2 (1.0), V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
-	    V2 (0x1.5555576a59599p-5) },
-};
-
-static inline uint64x2_t
-lookup_sbits (uint64x2_t i)
-{
-  return (uint64x2_t){__v_exp_tail_data[i[0]], __v_exp_tail_data[i[1]]};
-}
-
-#if WANT_V_EXP_TAIL_SPECIALCASE
-#define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513.  */
-/* The following 2 bias when combined form the exponent bias:
-   SpecialBias1 - SpecialBias2 = asuint64(1.0).  */
-#define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769.  */
-#define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254.  */
-static float64x2_t VPCS_ATTR
-v_exp_tail_special_case (float64x2_t s, float64x2_t y, float64x2_t n,
-			 const struct data *d)
-{
-  /* 2^(n/N) may overflow, break it up into s1*s2.  */
-  uint64x2_t b = vandq_u64 (vclezq_f64 (n), SpecialOffset);
-  float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));
-  float64x2_t s2 = vreinterpretq_f64_u64 (
-    vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));
-  uint64x2_t oflow = vcagtq_f64 (n, d->huge_bound);
-  float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);
-  float64x2_t r1 = vmulq_f64 (s1, s1);
-  return vbslq_f64 (oflow, r1, r0);
-}
-#endif
-
-static inline float64x2_t VPCS_ATTR
-v_exp_tail_inline (float64x2_t x, float64x2_t xtail)
-{
-  const struct data *d = ptr_barrier (&data);
-#if WANT_V_EXP_TAIL_SPECIALCASE
-  uint64x2_t special = vcgtq_f64 (vabsq_f64 (x), d->big_bound);
-#endif
-  /* n = round(x/(ln2/N)).  */
-  float64x2_t z = vfmaq_f64 (d->shift, x, d->invln2);
-  uint64x2_t u = vreinterpretq_u64_f64 (z);
-  float64x2_t n = vsubq_f64 (z, d->shift);
-
-  /* r = x - n*ln2/N.  */
-  float64x2_t r = x;
-  r = vfmsq_f64 (r, d->ln2_hi, n);
-  r = vfmsq_f64 (r, d->ln2_lo, n);
-
-  uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
-  uint64x2_t i = vandq_u64 (u, v_u64 (N - 1));
-
-  /* y = tail + exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4, using Horner.  */
-  float64x2_t y = v_horner_3_f64 (r, d->poly);
-  y = vfmaq_f64 (xtail, y, r);
-
-  /* s = 2^(n/N).  */
-  u = lookup_sbits (i);
-  float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
-
-#if WANT_V_EXP_TAIL_SPECIALCASE
-  if (unlikely (v_any_u64 (special)))
-    return v_exp_tail_special_case (s, y, n, d);
-#endif
-  return vfmaq_f64 (s, y, s);
-}
-#endif // PL_MATH_V_EXP_TAIL_INLINE_H
diff --git a/pl/math/v_expf_inline.h b/pl/math/v_expf_inline.h
deleted file mode 100644
index 166683726b4db3..00000000000000
--- a/pl/math/v_expf_inline.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Helper for single-precision routines which calculate exp(x) and do not
- * need special-case handling
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_V_EXPF_INLINE_H
-#define PL_MATH_V_EXPF_INLINE_H
-
-#include "v_math.h"
-
-struct v_expf_data
-{
-  float32x4_t poly[5];
-  float32x4_t shift, invln2_and_ln2;
-};
-
-/* maxerr: 1.45358 +0.5 ulp.  */
-#define V_EXPF_DATA                                                           \
-  {                                                                           \
-    .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f),  \
-	      V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) },                     \
-    .shift = V4 (0x1.8p23f),                                                  \
-    .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },   \
-  }
-
-#define ExponentBias v_u32 (0x3f800000) /* asuint(1.0f).  */
-#define C(i) d->poly[i]
-
-static inline float32x4_t
-v_expf_inline (float32x4_t x, const struct v_expf_data *d)
-{
-  /* Helper routine for calculating exp(x).
-     Copied from v_expf.c, with all special-case handling removed - the
-     calling routine should handle special values if required.  */
-
-  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-  float32x4_t n, r, z;
-  z = vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0);
-  n = vsubq_f32 (z, d->shift);
-  r = vfmsq_laneq_f32 (x, n, d->invln2_and_ln2, 1);
-  r = vfmsq_laneq_f32 (r, n, d->invln2_and_ln2, 2);
-  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
-  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
-
-  /* Custom order-4 Estrin avoids building high order monomial.  */
-  float32x4_t r2 = vmulq_f32 (r, r);
-  float32x4_t p, q, poly;
-  p = vfmaq_f32 (C (1), C (0), r);
-  q = vfmaq_f32 (C (3), C (2), r);
-  q = vfmaq_f32 (q, p, r2);
-  p = vmulq_f32 (C (4), r);
-  poly = vfmaq_f32 (p, q, r2);
-  return vfmaq_f32 (scale, poly, scale);
-}
-
-#endif // PL_MATH_V_EXPF_INLINE_H
diff --git a/pl/math/v_expm1_2u5.c b/pl/math/v_expm1_2u5.c
deleted file mode 100644
index dd255472cec0f9..00000000000000
--- a/pl/math/v_expm1_2u5.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Double-precision vector exp(x) - 1 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float64x2_t poly[11];
-  float64x2_t invln2, ln2, shift;
-  int64x2_t exponent_bias;
-#if WANT_SIMD_EXCEPT
-  uint64x2_t thresh, tiny_bound;
-#else
-  float64x2_t oflow_bound;
-#endif
-} data = {
-  /* Generated using fpminimax, with degree=12 in [log(2)/2, log(2)/2].  */
-  .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
-	    V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
-	    V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
-	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
-	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) },
-  .invln2 = V2 (0x1.71547652b82fep0),
-  .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },
-  .shift = V2 (0x1.8p52),
-  .exponent_bias = V2 (0x3ff0000000000000),
-#if WANT_SIMD_EXCEPT
-  /* asuint64(oflow_bound) - asuint64(0x1p-51), shifted left by 1 for abs
-     compare.  */
-  .thresh = V2 (0x78c56fa6d34b552),
-  /* asuint64(0x1p-51) << 1.  */
-  .tiny_bound = V2 (0x3cc0000000000000 << 1),
-#else
-  /* Value above which expm1(x) should overflow. Absolute value of the
-     underflow bound is greater than this, so it catches both cases - there is
-     a small window where fallbacks are triggered unnecessarily.  */
-  .oflow_bound = V2 (0x1.62b7d369a5aa9p+9),
-#endif
-};
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
-{
-  return v_call_f64 (expm1, x, y, special);
-}
-
-/* Double-precision vector exp(x) - 1 function.
-   The maximum error observed error is 2.18 ULP:
-   _ZGVnN2v_expm1 (0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
-					want 0x1.a8b9ea8d66e2p-2.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-
-#if WANT_SIMD_EXCEPT
-  /* If fp exceptions are to be triggered correctly, fall back to scalar for
-     |x| < 2^-51, |x| > oflow_bound, Inf & NaN. Add ix to itself for
-     shift-left by 1, and compare with thresh which was left-shifted offline -
-     this is effectively an absolute compare.  */
-  uint64x2_t special
-      = vcgeq_u64 (vsubq_u64 (vaddq_u64 (ix, ix), d->tiny_bound), d->thresh);
-  if (unlikely (v_any_u64 (special)))
-    x = v_zerofy_f64 (x, special);
-#else
-  /* Large input, NaNs and Infs.  */
-  uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
-#endif
-
-  /* Reduce argument to smaller range:
-     Let i = round(x / ln2)
-     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where 2^i is exact because i is an integer.  */
-  float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
-  int64x2_t i = vcvtq_s64_f64 (n);
-  float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0);
-  f = vfmsq_laneq_f64 (f, n, d->ln2, 1);
-
-  /* Approximate expm1(f) using polynomial.
-     Taylor expansion for expm1(x) has the form:
-	 x + ax^2 + bx^3 + cx^4 ....
-     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
-     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t f4 = vmulq_f64 (f2, f2);
-  float64x2_t f8 = vmulq_f64 (f4, f4);
-  float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
-
-  /* Assemble the result.
-     expm1(x) ~= 2^i * (p + 1) - 1
-     Let t = 2^i.  */
-  int64x2_t u = vaddq_s64 (vshlq_n_s64 (i, 52), d->exponent_bias);
-  float64x2_t t = vreinterpretq_f64_s64 (u);
-
-  if (unlikely (v_any_u64 (special)))
-    return special_case (vreinterpretq_f64_u64 (ix),
-			 vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t),
-			 special);
-
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
-}
-
-PL_SIG (V, D, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_D1 (expm1), 1.68)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (expm1), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0, 0x1p-51, 1000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1p-51, 0x1.62b7d369a5aa9p+9, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1.62b7d369a5aa9p+9, inf, 100)
diff --git a/pl/math/v_expm1f_1u6.c b/pl/math/v_expm1f_1u6.c
deleted file mode 100644
index 6b282d0cc00f3b..00000000000000
--- a/pl/math/v_expm1f_1u6.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Single-precision vector exp(x) - 1 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float32x4_t poly[5];
-  float32x4_t invln2_and_ln2;
-  float32x4_t shift;
-  int32x4_t exponent_bias;
-#if WANT_SIMD_EXCEPT
-  uint32x4_t thresh;
-#else
-  float32x4_t oflow_bound;
-#endif
-} data = {
-  /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2].  */
-  .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
-	    V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
-  /* Stores constants: invln2, ln2_hi, ln2_lo, 0.  */
-  .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
-  .shift = V4 (0x1.8p23f),
-  .exponent_bias = V4 (0x3f800000),
-#if !WANT_SIMD_EXCEPT
-  /* Value above which expm1f(x) should overflow. Absolute value of the
-     underflow bound is greater than this, so it catches both cases - there is
-     a small window where fallbacks are triggered unnecessarily.  */
-  .oflow_bound = V4 (0x1.5ebc4p+6),
-#else
-  /* asuint(oflow_bound) - asuint(0x1p-23), shifted left by 1 for absolute
-     compare.  */
-  .thresh = V4 (0x1d5ebc40),
-#endif
-};
-
-/* asuint(0x1p-23), shifted by 1 for abs compare.  */
-#define TinyBound v_u32 (0x34000000 << 1)
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
-{
-  return v_call_f32 (expm1f, x, y, special);
-}
-
-/* Single-precision vector exp(x) - 1 function.
-   The maximum error is 1.51 ULP:
-   _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2
-				  want 0x1.e2fb94p-2.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (expm1) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint32x4_t ix = vreinterpretq_u32_f32 (x);
-
-#if WANT_SIMD_EXCEPT
-  /* If fp exceptions are to be triggered correctly, fall back to scalar for
-     |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for
-     shift-left by 1, and compare with thresh which was left-shifted offline -
-     this is effectively an absolute compare.  */
-  uint32x4_t special
-      = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh);
-  if (unlikely (v_any_u32 (special)))
-    x = v_zerofy_f32 (x, special);
-#else
-  /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf.  */
-  uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
-#endif
-
-  /* Reduce argument to smaller range:
-     Let i = round(x / ln2)
-     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where 2^i is exact because i is an integer.  */
-  float32x4_t j = vsubq_f32 (
-      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
-  int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
-  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
-
-  /* Approximate expm1(f) using polynomial.
-     Taylor expansion for expm1(x) has the form:
-	 x + ax^2 + bx^3 + cx^4 ....
-     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
-     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  float32x4_t p = v_horner_4_f32 (f, d->poly);
-  p = vfmaq_f32 (f, vmulq_f32 (f, f), p);
-
-  /* Assemble the result.
-     expm1(x) ~= 2^i * (p + 1) - 1
-     Let t = 2^i.  */
-  int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
-  float32x4_t t = vreinterpretq_f32_s32 (u);
-
-  if (unlikely (v_any_u32 (special)))
-    return special_case (vreinterpretq_f32_u32 (ix),
-			 vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t),
-			 special);
-
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
-}
-
-PL_SIG (V, F, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_F1 (expm1), 1.02)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (expm1), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (expm1), 0, 0x1p-23, 1000)
-PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, 0x1.5ebc4p+6, 1000000)
-PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, -0x1.9bbabcp+6, 1000000)
-PL_TEST_INTERVAL (V_NAME_F1 (expm1), 0x1.5ebc4p+6, inf, 1000)
-PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1.9bbabcp+6, -inf, 1000)
diff --git a/pl/math/v_expm1f_inline.h b/pl/math/v_expm1f_inline.h
deleted file mode 100644
index 6ae94c452de2f1..00000000000000
--- a/pl/math/v_expm1f_inline.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Helper for single-precision routines which calculate exp(x) - 1 and do not
- * need special-case handling
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_V_EXPM1F_INLINE_H
-#define PL_MATH_V_EXPM1F_INLINE_H
-
-#include "v_math.h"
-#include "math_config.h"
-#include "poly_advsimd_f32.h"
-
-struct v_expm1f_data
-{
-  float32x4_t poly[5];
-  float32x4_t invln2_and_ln2, shift;
-  int32x4_t exponent_bias;
-};
-
-/* Coefficients generated using fpminimax with degree=5 in [-log(2)/2,
-   log(2)/2]. Exponent bias is asuint(1.0f).
-   invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0.  */
-#define V_EXPM1F_DATA                                                         \
-  {                                                                           \
-    .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),     \
-	      V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },                      \
-    .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000),                \
-    .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },   \
-  }
-
-static inline float32x4_t
-expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
-{
-  /* Helper routine for calculating exp(x) - 1.
-     Copied from v_expm1f_1u6.c, with all special-case handling removed - the
-     calling routine should handle special values if required.  */
-
-  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
-  float32x4_t j = vsubq_f32 (
-      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
-  int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
-  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
-
-  /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
-     Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses
-     Horner.  */
-  float32x4_t f2 = vmulq_f32 (f, f);
-  float32x4_t f4 = vmulq_f32 (f2, f2);
-  float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly);
-  p = vfmaq_f32 (f, f2, p);
-
-  /* t = 2^i.  */
-  int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
-  float32x4_t t = vreinterpretq_f32_s32 (u);
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
-}
-
-#endif // PL_MATH_V_EXPM1F_INLINE_H
diff --git a/pl/math/v_log10_2u5.c b/pl/math/v_log10_2u5.c
deleted file mode 100644
index 35dd62fe5e3ef8..00000000000000
--- a/pl/math/v_log10_2u5.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Double-precision vector log10(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
-
-#define N (1 << V_LOG10_TABLE_BITS)
-
-static const struct data
-{
-  uint64x2_t min_norm;
-  uint32x4_t special_bound;
-  float64x2_t poly[5];
-  float64x2_t invln10, log10_2, ln2;
-  uint64x2_t sign_exp_mask;
-} data = {
-  /* Computed from log coefficients divided by log(10) then rounded to double
-     precision.  */
-  .poly = { V2 (-0x1.bcb7b1526e506p-3), V2 (0x1.287a7636be1d1p-3),
-	    V2 (-0x1.bcb7b158af938p-4), V2 (0x1.63c78734e6d07p-4),
-	    V2 (-0x1.287461742fee4p-4) },
-  .ln2 = V2 (0x1.62e42fefa39efp-1),
-  .invln10 = V2 (0x1.bcb7b1526e50ep-2),
-  .log10_2 = V2 (0x1.34413509f79ffp-2),
-  .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022).  */
-  .special_bound = V4 (0x7fe00000),    /* asuint64(inf) - min_norm.  */
-  .sign_exp_mask = V2 (0xfff0000000000000),
-};
-
-#define Off v_u64 (0x3fe6900900000000)
-#define IndexMask (N - 1)
-
-#define T(s, i) __v_log10_data.s[i]
-
-struct entry
-{
-  float64x2_t invc;
-  float64x2_t log10c;
-};
-
-static inline struct entry
-lookup (uint64x2_t i)
-{
-  struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
-  float64x2_t e0 = vld1q_f64 (&__v_log10_data.table[i0].invc);
-  float64x2_t e1 = vld1q_f64 (&__v_log10_data.table[i1].invc);
-  e.invc = vuzp1q_f64 (e0, e1);
-  e.log10c = vuzp2q_f64 (e0, e1);
-  return e;
-}
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2,
-	      uint32x2_t special)
-{
-  return v_call_f64 (log10, x, vfmaq_f64 (hi, r2, y), vmovl_u32 (special));
-}
-
-/* Fast implementation of double-precision vector log10
-   is a slight modification of double-precision vector log.
-   Max ULP error: < 2.5 ulp (nearest rounding.)
-   Maximum measured at 2.46 ulp for x in [0.96, 0.97]
-   _ZGVnN2v_log10(0x1.13192407fcb46p+0) got 0x1.fff6be3cae4bbp-6
-				       want 0x1.fff6be3cae4b9p-6.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
-				 vget_low_u32 (d->special_bound));
-
-  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  uint64x2_t tmp = vsubq_u64 (ix, Off);
-  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
-  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
-  float64x2_t z = vreinterpretq_f64_u64 (iz);
-
-  struct entry e = lookup (tmp);
-
-  /* log10(x) = log1p(z/c-1)/log(10) + log10(c) + k*log10(2).  */
-  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
-  float64x2_t kd = vcvtq_f64_s64 (k);
-
-  /* hi = r / log(10) + log10(c) + k*log10(2).
-     Constants in v_log10_data.c are computed (in extended precision) as
-     e.log10c := e.logc * ivln10.  */
-  float64x2_t w = vfmaq_f64 (e.log10c, r, d->invln10);
-
-  /* y = log10(1+r) + n * log10(2).  */
-  float64x2_t hi = vfmaq_f64 (w, kd, d->log10_2);
-
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  float64x2_t r2 = vmulq_f64 (r, r);
-  float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly);
-
-  if (unlikely (v_any_u32h (special)))
-    return special_case (x, y, hi, r2, special);
-  return vfmaq_f64 (hi, r2, y);
-}
-
-PL_SIG (V, D, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (V_NAME_D1 (log10), 1.97)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (log10))
-PL_TEST_INTERVAL (V_NAME_D1 (log10), -0.0, -inf, 1000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 0, 0x1p-149, 1000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 1.0, 100, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 100, inf, 50000)
diff --git a/pl/math/v_log10f_3u5.c b/pl/math/v_log10f_3u5.c
deleted file mode 100644
index 92bc50ba5bd93a..00000000000000
--- a/pl/math/v_log10f_3u5.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Single-precision vector log10 function.
- *
- * Copyright (c) 2020-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  uint32x4_t min_norm;
-  uint16x8_t special_bound;
-  float32x4_t poly[8];
-  float32x4_t inv_ln10, ln2;
-  uint32x4_t off, mantissa_mask;
-} data = {
-  /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in
-      [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25.  */
-  .poly = { V4 (-0x1.bcb79cp-3f), V4 (0x1.2879c8p-3f), V4 (-0x1.bcd472p-4f),
-	    V4 (0x1.6408f8p-4f), V4 (-0x1.246f8p-4f), V4 (0x1.f0e514p-5f),
-	    V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) },
-  .ln2 = V4 (0x1.62e43p-1f),
-  .inv_ln10 = V4 (0x1.bcb7b2p-2f),
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
-  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
-  .mantissa_mask = V4 (0x007fffff),
-};
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2,
-	      uint16x4_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp));
-}
-
-/* Fast implementation of AdvSIMD log10f,
-   uses a similar approach as AdvSIMD logf with the same offset (i.e., 2/3) and
-   an order 9 polynomial.
-   Maximum error: 3.305ulps (nearest rounding.)
-   _ZGVnN4v_log10f(0x1.555c16p+0) got 0x1.ffe2fap-4
-				 want 0x1.ffe2f4p-4.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (log10) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint32x4_t u = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-				 vget_low_u16 (d->special_bound));
-
-  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
-  float32x4_t n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
-  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
-
-  /* y = log10(1+r) + n * log10(2).  */
-  float32x4_t r2 = vmulq_f32 (r, r);
-  float32x4_t poly = v_pw_horner_7_f32 (r, r2, d->poly);
-  /* y = Log10(2) * n + poly * InvLn(10).  */
-  float32x4_t y = vfmaq_f32 (r, d->ln2, n);
-  y = vmulq_f32 (y, d->inv_ln10);
-
-  if (unlikely (v_any_u16h (special)))
-    return special_case (x, y, poly, r2, special);
-  return vfmaq_f32 (y, poly, r2);
-}
-
-PL_SIG (V, F, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (V_NAME_F1 (log10), 2.81)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (log10))
-PL_TEST_INTERVAL (V_NAME_F1 (log10), -0.0, -inf, 100)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 0, 0x1p-126, 100)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 1.0, 100, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 100, inf, 50000)
diff --git a/pl/math/v_log1p_2u5.c b/pl/math/v_log1p_2u5.c
deleted file mode 100644
index face02ddc6c388..00000000000000
--- a/pl/math/v_log1p_2u5.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Double-precision vector log(1+x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-const static struct data
-{
-  float64x2_t poly[19], ln2[2];
-  uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask, inf, minus_one;
-  int64x2_t one_top;
-} data = {
-  /* Generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].  */
-  .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2),
-	    V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3),
-	    V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3),
-	    V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4),
-	    V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4),
-	    V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4),
-	    V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4),
-	    V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5),
-	    V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4),
-	    V2 (-0x1.cfa7385bdb37ep-6) },
-  .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) },
-  /* top32(asuint64(sqrt(2)/2)) << 32.  */
-  .hf_rt2_top = V2 (0x3fe6a09e00000000),
-  /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32.  */
-  .one_m_hf_rt2_top = V2 (0x00095f6200000000),
-  .umask = V2 (0x000fffff00000000),
-  .one_top = V2 (0x3ff),
-  .inf = V2 (0x7ff0000000000000),
-  .minus_one = V2 (0xbff0000000000000)
-};
-
-#define BottomMask v_u64 (0xffffffff)
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
-{
-  return v_call_f64 (log1p, x, y, special);
-}
-
-/* Vector log1p approximation using polynomial on reduced interval. Routine is
-   a modification of the algorithm used in scalar log1p, with no shortcut for
-   k=0 and no narrowing for f and k. Maximum observed error is 2.45 ULP:
-   _ZGVnN2v_log1p(0x1.658f7035c4014p+11) got 0x1.fd61d0727429dp+2
-					want 0x1.fd61d0727429fp+2 .  */
-VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
-  uint64x2_t special = vcgeq_u64 (ia, d->inf);
-
-#if WANT_SIMD_EXCEPT
-  special = vorrq_u64 (special,
-		       vcgeq_u64 (ix, vreinterpretq_u64_f64 (v_f64 (-1))));
-  if (unlikely (v_any_u64 (special)))
-    x = v_zerofy_f64 (x, special);
-#else
-  special = vorrq_u64 (special, vcleq_f64 (x, v_f64 (-1)));
-#endif
-
-  /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
-			   is in [sqrt(2)/2, sqrt(2)]):
-     log1p(x) = k*log(2) + log1p(f).
-
-     f may not be representable exactly, so we need a correction term:
-     let m = round(1 + x), c = (1 + x) - m.
-     c << m: at very small x, log1p(x) ~ x, hence:
-     log(1+x) - log(m) ~ c/m.
-
-     We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m.  */
-
-  /* Obtain correctly scaled k by manipulation in the exponent.
-     The scalar algorithm casts down to 32-bit at this point to calculate k and
-     u_red. We stay in double-width to obtain f and k, using the same constants
-     as the scalar algorithm but shifted left by 32.  */
-  float64x2_t m = vaddq_f64 (x, v_f64 (1));
-  uint64x2_t mi = vreinterpretq_u64_f64 (m);
-  uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
-
-  int64x2_t ki
-      = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
-  float64x2_t k = vcvtq_f64_s64 (ki);
-
-  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
-  uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
-  uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
-  float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
-
-  /* Correction term c/m.  */
-  float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
-
-  /* Approximate log1p(x) on the reduced input using a polynomial. Because
-     log1p(0)=0 we choose an approximation of the form:
-       x + C0*x^2 + C1*x^3 + C2x^4 + ...
-     Hence approximation has the form f + f^2 * P(f)
-      where P(x) = C0 + C1*x + C2x^2 + ...
-     Assembling this all correctly is dealt with at the final step.  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
-
-  float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
-  float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
-  float64x2_t y = vaddq_f64 (ylo, yhi);
-
-  if (unlikely (v_any_u64 (special)))
-    return special_case (vreinterpretq_f64_u64 (ix), vfmaq_f64 (y, f2, p),
-			 special);
-
-  return vfmaq_f64 (y, f2, p);
-}
-
-PL_SIG (V, D, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (V_NAME_D1 (log1p), 1.97)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (log1p), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.001, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log1p), 1, inf, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (log1p), -1.0, -inf, 500)
diff --git a/pl/math/v_log1p_inline.h b/pl/math/v_log1p_inline.h
deleted file mode 100644
index bd57bfc6fe6e84..00000000000000
--- a/pl/math/v_log1p_inline.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Helper for vector double-precision routines which calculate log(1 + x) and do
- * not need special-case handling
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-#ifndef PL_MATH_V_LOG1P_INLINE_H
-#define PL_MATH_V_LOG1P_INLINE_H
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-
-struct v_log1p_data
-{
-  float64x2_t poly[19], ln2[2];
-  uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask;
-  int64x2_t one_top;
-};
-
-/* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].  */
-#define V_LOG1P_CONSTANTS_TABLE                                               \
-  {                                                                           \
-    .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2),          \
-	      V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3),          \
-	      V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3),          \
-	      V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4),          \
-	      V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4),          \
-	      V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4),          \
-	      V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4),          \
-	      V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5),          \
-	      V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4),          \
-	      V2 (-0x1.cfa7385bdb37ep-6) },                                   \
-    .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) },         \
-    .hf_rt2_top = V2 (0x3fe6a09e00000000),                                    \
-    .one_m_hf_rt2_top = V2 (0x00095f6200000000),                              \
-    .umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff)                   \
-  }
-
-#define BottomMask v_u64 (0xffffffff)
-
-static inline float64x2_t
-log1p_inline (float64x2_t x, const struct v_log1p_data *d)
-{
-  /* Helper for calculating log(x + 1). Copied from v_log1p_2u5.c, with several
-     modifications:
-     - No special-case handling - this should be dealt with by the caller.
-     - Pairwise Horner polynomial evaluation for improved accuracy.
-     - Optionally simulate the shortcut for k=0, used in the scalar routine,
-       using v_sel, for improved accuracy when the argument to log1p is close to
-       0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 in
-       the source of the caller before including this file.
-     See v_log1pf_2u1.c for details of the algorithm.  */
-  float64x2_t m = vaddq_f64 (x, v_f64 (1));
-  uint64x2_t mi = vreinterpretq_u64_f64 (m);
-  uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
-
-  int64x2_t ki
-      = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
-  float64x2_t k = vcvtq_f64_s64 (ki);
-
-  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
-  uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
-  uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
-  float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
-
-  /* Correction term c/m.  */
-  float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
-
-#ifndef WANT_V_LOG1P_K0_SHORTCUT
-#error                                                                         \
-  "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
-#elif WANT_V_LOG1P_K0_SHORTCUT
-  /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
-     that the approximation is solely the polynomial.  */
-  uint64x2_t k0 = vceqzq_f64 (k);
-  cm = v_zerofy_f64 (cm, k0);
-  f = vbslq_f64 (k0, x, f);
-#endif
-
-  /* Approximate log1p(f) on the reduced input using a polynomial.  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
-
-  /* Assemble log1p(x) = k * log2 + log1p(f) + c/m.  */
-  float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
-  float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
-  return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p);
-}
-
-#endif // PL_MATH_V_LOG1P_INLINE_H
diff --git a/pl/math/v_log1pf_2u1.c b/pl/math/v_log1pf_2u1.c
deleted file mode 100644
index 153c88da9c888d..00000000000000
--- a/pl/math/v_log1pf_2u1.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Single-precision vector log(1+x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
-
-const static struct data
-{
-  float32x4_t poly[8], ln2;
-  uint32x4_t tiny_bound, minus_one, four, thresh;
-  int32x4_t three_quarters;
-} data = {
-  .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients
-	       (1, -0.5) are not stored as they can be generated more
-	       efficiently.  */
-	    V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f),
-	    V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f),
-	    V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) },
-  .ln2 = V4 (0x1.62e43p-1f),
-  .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23.  */
-  .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound.  */
-  .minus_one = V4 (0xbf800000),
-  .four = V4 (0x40800000),
-  .three_quarters = V4 (0x3f400000)
-};
-
-static inline float32x4_t
-eval_poly (float32x4_t m, const float32x4_t *p)
-{
-  /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme.  */
-  float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]);
-  float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]);
-  float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]);
-  float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]);
-
-  float32x4_t m2 = vmulq_f32 (m, m);
-  float32x4_t p_02 = vfmaq_f32 (m, m2, p_12);
-  float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56);
-  float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]);
-
-  float32x4_t m4 = vmulq_f32 (m2, m2);
-  float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36);
-  return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79));
-}
-
-static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
-{
-  return v_call_f32 (log1pf, x, y, special);
-}
-
-/* Vector log1pf approximation using polynomial on reduced interval. Accuracy
-   is roughly 2.02 ULP:
-   log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3.  */
-VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  uint32x4_t ix = vreinterpretq_u32_f32 (x);
-  uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
-  uint32x4_t special_cases
-      = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh),
-		   vcgeq_u32 (ix, d->minus_one));
-  float32x4_t special_arg = x;
-
-#if WANT_SIMD_EXCEPT
-  if (unlikely (v_any_u32 (special_cases)))
-    /* Side-step special lanes so fenv exceptions are not triggered
-       inadvertently.  */
-    x = v_zerofy_f32 (x, special_cases);
-#endif
-
-  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
-			   is in [-0.25, 0.5]):
-     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
-     We approximate log1p(m) with a polynomial, then scale by
-     k*log(2). Instead of doing this directly, we use an intermediate
-     scale factor s = 4*k*log(2) to ensure the scale is representable
-     as a normalised fp32 number.  */
-
-  float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
-
-  /* Choose k to scale x to the range [-1/4, 1/2].  */
-  int32x4_t k
-      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
-		   v_s32 (0xff800000));
-  uint32x4_t ku = vreinterpretq_u32_s32 (k);
-
-  /* Scale x by exponent manipulation.  */
-  float32x4_t m_scale
-      = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
-
-  /* Scale up to ensure that the scale factor is representable as normalised
-     fp32 number, and scale m down accordingly.  */
-  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
-  m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
-
-  /* Evaluate polynomial on the reduced interval.  */
-  float32x4_t p = eval_poly (m_scale, d->poly);
-
-  /* The scale factor to be applied back at the end - by multiplying float(k)
-     by 2^-23 we get the unbiased exponent of k.  */
-  float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23));
-
-  /* Apply the scaling back.  */
-  float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2);
-
-  if (unlikely (v_any_u32 (special_cases)))
-    return special_case (special_arg, y, special_cases);
-  return y;
-}
-
-PL_SIG (V, F, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (V_NAME_F1 (log1p), 1.53)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (log1p), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0.0, 0x1p-23, 30000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0x1p-23, 1, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log1p), 1, inf, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log1p), -1.0, -inf, 1000)
diff --git a/pl/math/v_log1pf_inline.h b/pl/math/v_log1pf_inline.h
deleted file mode 100644
index c654c6bad08fd7..00000000000000
--- a/pl/math/v_log1pf_inline.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Helper for single-precision routines which calculate log(1 + x) and do not
- * need special-case handling
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_V_LOG1PF_INLINE_H
-#define PL_MATH_V_LOG1PF_INLINE_H
-
-#include "v_math.h"
-#include "poly_advsimd_f32.h"
-
-struct v_log1pf_data
-{
-  float32x4_t poly[8], ln2;
-  uint32x4_t four;
-  int32x4_t three_quarters;
-};
-
-/* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients
-   (1, -0.5) are not stored as they can be generated more efficiently.  */
-#define V_LOG1PF_CONSTANTS_TABLE                                              \
-  {                                                                           \
-    .poly                                                                     \
-	= { V4 (0x1.5555aap-2f),  V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f),  \
-	    V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f),	V4 (-0x1.0da91p-3f),  \
-	    V4 (0x1.abcb6p-4f),	  V4 (-0x1.6f0d5ep-5f) },                     \
-	.ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000),                   \
-	.three_quarters = V4 (0x3f400000)                                     \
-  }
-
-static inline float32x4_t
-eval_poly (float32x4_t m, const float32x4_t *c)
-{
-  /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine
-     uses split Estrin, but this way reduces register pressure in the calling
-     routine).  */
-  float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]);
-  float32x4_t m2 = vmulq_f32 (m, m);
-  q = vfmaq_f32 (m, m2, q);
-  float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1);
-  p = vmulq_f32 (m2, p);
-  return vfmaq_f32 (q, m2, p);
-}
-
-static inline float32x4_t
-log1pf_inline (float32x4_t x, const struct v_log1pf_data d)
-{
-  /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
-     special-case handling. See that file for details of the algorithm.  */
-  float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
-  int32x4_t k
-      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters),
-		   v_s32 (0xff800000));
-  uint32x4_t ku = vreinterpretq_u32_s32 (k);
-  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku));
-  float32x4_t m_scale
-      = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
-  m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
-  float32x4_t p = eval_poly (m_scale, d.poly);
-  float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f));
-  return vfmaq_f32 (p, scale_back, d.ln2);
-}
-
-#endif //  PL_MATH_V_LOG1PF_INLINE_H
diff --git a/pl/math/v_log2_3u.c b/pl/math/v_log2_3u.c
deleted file mode 100644
index 2dd2c34b7c97f0..00000000000000
--- a/pl/math/v_log2_3u.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Double-precision vector log2 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
-
-#define N (1 << V_LOG2_TABLE_BITS)
-
-static const struct data
-{
-  uint64x2_t min_norm;
-  uint32x4_t special_bound;
-  float64x2_t poly[5];
-  float64x2_t invln2;
-  uint64x2_t sign_exp_mask;
-} data = {
-  /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9
-     and N = 128, then scaled by log2(e) in extended precision and rounded back
-     to double precision.  */
-  .poly = { V2 (-0x1.71547652b83p-1), V2 (0x1.ec709dc340953p-2),
-	    V2 (-0x1.71547651c8f35p-2), V2 (0x1.2777ebe12dda5p-2),
-	    V2 (-0x1.ec738d616fe26p-3) },
-  .invln2 = V2 (0x1.71547652b82fep0),
-  .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022).  */
-  .special_bound = V4 (0x7fe00000),    /* asuint64(inf) - min_norm.  */
-  .sign_exp_mask = V2 (0xfff0000000000000),
-};
-
-#define Off v_u64 (0x3fe6900900000000)
-#define IndexMask (N - 1)
-
-struct entry
-{
-  float64x2_t invc;
-  float64x2_t log2c;
-};
-
-static inline struct entry
-lookup (uint64x2_t i)
-{
-  struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
-  float64x2_t e0 = vld1q_f64 (&__v_log2_data.table[i0].invc);
-  float64x2_t e1 = vld1q_f64 (&__v_log2_data.table[i1].invc);
-  e.invc = vuzp1q_f64 (e0, e1);
-  e.log2c = vuzp2q_f64 (e0, e1);
-  return e;
-}
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, float64x2_t w, float64x2_t r2,
-	      uint32x2_t special)
-{
-  return v_call_f64 (log2, x, vfmaq_f64 (w, r2, y), vmovl_u32 (special));
-}
-
-/* Double-precision vector log2 routine. Implements the same algorithm as
-   vector log10, with coefficients and table entries scaled in extended
-   precision. The maximum observed error is 2.58 ULP:
-   _ZGVnN2v_log2(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
-				      want 0x1.fffb34198d9ddp-5.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (log2) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
-				 vget_low_u32 (d->special_bound));
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  uint64x2_t tmp = vsubq_u64 (ix, Off);
-  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
-  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
-  float64x2_t z = vreinterpretq_f64_u64 (iz);
-
-  struct entry e = lookup (tmp);
-
-  /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k.  */
-
-  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
-  float64x2_t kd = vcvtq_f64_s64 (k);
-  float64x2_t w = vfmaq_f64 (e.log2c, r, d->invln2);
-
-  float64x2_t r2 = vmulq_f64 (r, r);
-  float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly);
-  w = vaddq_f64 (kd, w);
-
-  if (unlikely (v_any_u32h (special)))
-    return special_case (x, y, w, r2, special);
-  return vfmaq_f64 (w, r2, y);
-}
-
-PL_SIG (V, D, 1, log2, 0.01, 11.1)
-PL_TEST_ULP (V_NAME_D1 (log2), 2.09)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (log2))
-PL_TEST_INTERVAL (V_NAME_D1 (log2), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 1.0, 100, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 100, inf, 50000)
diff --git a/pl/math/v_log2f_2u5.c b/pl/math/v_log2f_2u5.c
deleted file mode 100644
index c64d88742136e1..00000000000000
--- a/pl/math/v_log2f_2u5.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Single-precision vector log2 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  uint32x4_t min_norm;
-  uint16x8_t special_bound;
-  uint32x4_t off, mantissa_mask;
-  float32x4_t poly[9];
-} data = {
-  /* Coefficients generated using Remez algorithm approximate
-     log2(1+r)/r for r in [ -1/3, 1/3 ].
-     rel error: 0x1.c4c4b0cp-26.  */
-  .poly = { V4 (0x1.715476p0f), /* (float)(1 / ln(2)).  */
-	    V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f),
-	    V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f),
-	    V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) },
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
-  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
-  .mantissa_mask = V4 (0x007fffff),
-};
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r,
-	      uint16x4_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (log2f, x, vfmaq_f32 (n, p, r), vmovl_u16 (cmp));
-}
-
-/* Fast implementation for single precision AdvSIMD log2,
-   relies on same argument reduction as AdvSIMD logf.
-   Maximum error: 2.48 ULPs
-   _ZGVnN4v_log2f(0x1.558174p+0) got 0x1.a9be84p-2
-				want 0x1.a9be8p-2.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (log2) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint32x4_t u = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-				 vget_low_u16 (d->special_bound));
-
-  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
-  float32x4_t n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
-  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
-
-  /* y = log2(1+r) + n.  */
-  float32x4_t r2 = vmulq_f32 (r, r);
-  float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly);
-
-  if (unlikely (v_any_u16h (special)))
-    return special_case (x, n, p, r, special);
-  return vfmaq_f32 (n, p, r);
-}
-
-PL_SIG (V, F, 1, log2, 0.01, 11.1)
-PL_TEST_ULP (V_NAME_F1 (log2), 1.99)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (log2))
-PL_TEST_INTERVAL (V_NAME_F1 (log2), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 1.0, 100, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 100, inf, 50000)
diff --git a/pl/math/v_log_data.c b/pl/math/v_log_data.c
deleted file mode 100644
index a26e8a051d973b..00000000000000
--- a/pl/math/v_log_data.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Lookup table for double-precision log(x) vector function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-const struct v_log_data __v_log_data = {
-  /* Worst-case error: 1.17 + 0.5 ulp.
-     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
-  .poly = { -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,
-	    0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3 },
-  .ln2 = 0x1.62e42fefa39efp-1,
-  /* Algorithm:
-
-	x = 2^k z
-	log(x) = k ln2 + log(c) + poly(z/c - 1)
-
-     where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
-     N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
-     tables:
-
-	table[i].invc = 1/c
-	table[i].logc = (double)log(c)
-
-     where c is near the center of the subinterval and is chosen by trying
-     several floating point invc candidates around 1/center and selecting one
-     for which the error in (double)log(c) is minimized (< 0x1p-74), except the
-     subinterval that contains 1 and the previous one got tweaked to avoid
-     cancellation.  */
-  .table = { { 0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2 },
-	     { 0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2 },
-	     { 0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2 },
-	     { 0x1.642bfa30ac371p+0, -0x1.52294fbf2af55p-2 },
-	     { 0x1.623f1d916f323p+0, -0x1.4c9c7b598aa38p-2 },
-	     { 0x1.60578da220f65p+0, -0x1.47174fc5ff560p-2 },
-	     { 0x1.5e75349dea571p+0, -0x1.4199b7fa7b5cap-2 },
-	     { 0x1.5c97fd387a75ap+0, -0x1.3c239f48cfb99p-2 },
-	     { 0x1.5abfd2981f200p+0, -0x1.36b4f154d2aebp-2 },
-	     { 0x1.58eca051dc99cp+0, -0x1.314d9a0ff32fbp-2 },
-	     { 0x1.571e526d9df12p+0, -0x1.2bed85cca3cffp-2 },
-	     { 0x1.5554d555b3fcbp+0, -0x1.2694a11421af9p-2 },
-	     { 0x1.539015e2a20cdp+0, -0x1.2142d8d014fb2p-2 },
-	     { 0x1.51d0014ee0164p+0, -0x1.1bf81a2c77776p-2 },
-	     { 0x1.50148538cd9eep+0, -0x1.16b452a39c6a4p-2 },
-	     { 0x1.4e5d8f9f698a1p+0, -0x1.11776ffa6c67ep-2 },
-	     { 0x1.4cab0edca66bep+0, -0x1.0c416035020e0p-2 },
-	     { 0x1.4afcf1a9db874p+0, -0x1.071211aa10fdap-2 },
-	     { 0x1.495327136e16fp+0, -0x1.01e972e293b1bp-2 },
-	     { 0x1.47ad9e84af28fp+0, -0x1.f98ee587fd434p-3 },
-	     { 0x1.460c47b39ae15p+0, -0x1.ef5800ad716fbp-3 },
-	     { 0x1.446f12b278001p+0, -0x1.e52e160484698p-3 },
-	     { 0x1.42d5efdd720ecp+0, -0x1.db1104b19352ep-3 },
-	     { 0x1.4140cfe001a0fp+0, -0x1.d100ac59e0bd6p-3 },
-	     { 0x1.3fafa3b421f69p+0, -0x1.c6fced287c3bdp-3 },
-	     { 0x1.3e225c9c8ece5p+0, -0x1.bd05a7b317c29p-3 },
-	     { 0x1.3c98ec29a211ap+0, -0x1.b31abd229164fp-3 },
-	     { 0x1.3b13442a413fep+0, -0x1.a93c0edadb0a3p-3 },
-	     { 0x1.399156baa3c54p+0, -0x1.9f697ee30d7ddp-3 },
-	     { 0x1.38131639b4cdbp+0, -0x1.95a2efa9aa40ap-3 },
-	     { 0x1.36987540fbf53p+0, -0x1.8be843d796044p-3 },
-	     { 0x1.352166b648f61p+0, -0x1.82395ecc477edp-3 },
-	     { 0x1.33adddb3eb575p+0, -0x1.7896240966422p-3 },
-	     { 0x1.323dcd99fc1d3p+0, -0x1.6efe77aca8c55p-3 },
-	     { 0x1.30d129fefc7d2p+0, -0x1.65723e117ec5cp-3 },
-	     { 0x1.2f67e6b72fe7dp+0, -0x1.5bf15c0955706p-3 },
-	     { 0x1.2e01f7cf8b187p+0, -0x1.527bb6c111da1p-3 },
-	     { 0x1.2c9f518ddc86ep+0, -0x1.491133c939f8fp-3 },
-	     { 0x1.2b3fe86e5f413p+0, -0x1.3fb1b90c7fc58p-3 },
-	     { 0x1.29e3b1211b25cp+0, -0x1.365d2cc485f8dp-3 },
-	     { 0x1.288aa08b373cfp+0, -0x1.2d13758970de7p-3 },
-	     { 0x1.2734abcaa8467p+0, -0x1.23d47a721fd47p-3 },
-	     { 0x1.25e1c82459b81p+0, -0x1.1aa0229f25ec2p-3 },
-	     { 0x1.2491eb1ad59c5p+0, -0x1.117655ddebc3bp-3 },
-	     { 0x1.23450a54048b5p+0, -0x1.0856fbf83ab6bp-3 },
-	     { 0x1.21fb1bb09e578p+0, -0x1.fe83fabbaa106p-4 },
-	     { 0x1.20b415346d8f7p+0, -0x1.ec6e8507a56cdp-4 },
-	     { 0x1.1f6fed179a1acp+0, -0x1.da6d68c7cc2eap-4 },
-	     { 0x1.1e2e99b93c7b3p+0, -0x1.c88078462be0cp-4 },
-	     { 0x1.1cf011a7a882ap+0, -0x1.b6a786a423565p-4 },
-	     { 0x1.1bb44b97dba5ap+0, -0x1.a4e2676ac7f85p-4 },
-	     { 0x1.1a7b3e66cdd4fp+0, -0x1.9330eea777e76p-4 },
-	     { 0x1.1944e11dc56cdp+0, -0x1.8192f134d5ad9p-4 },
-	     { 0x1.18112aebb1a6ep+0, -0x1.70084464f0538p-4 },
-	     { 0x1.16e013231b7e9p+0, -0x1.5e90bdec5cb1fp-4 },
-	     { 0x1.15b1913f156cfp+0, -0x1.4d2c3433c5536p-4 },
-	     { 0x1.14859cdedde13p+0, -0x1.3bda7e219879ap-4 },
-	     { 0x1.135c2dc68cfa4p+0, -0x1.2a9b732d27194p-4 },
-	     { 0x1.12353bdb01684p+0, -0x1.196eeb2b10807p-4 },
-	     { 0x1.1110bf25b85b4p+0, -0x1.0854be8ef8a7ep-4 },
-	     { 0x1.0feeafd2f8577p+0, -0x1.ee998cb277432p-5 },
-	     { 0x1.0ecf062c51c3bp+0, -0x1.ccadb79919fb9p-5 },
-	     { 0x1.0db1baa076c8bp+0, -0x1.aae5b1d8618b0p-5 },
-	     { 0x1.0c96c5bb3048ep+0, -0x1.89413015d7442p-5 },
-	     { 0x1.0b7e20263e070p+0, -0x1.67bfe7bf158dep-5 },
-	     { 0x1.0a67c2acd0ce3p+0, -0x1.46618f83941bep-5 },
-	     { 0x1.0953a6391e982p+0, -0x1.2525df1b0618ap-5 },
-	     { 0x1.0841c3caea380p+0, -0x1.040c8e2f77c6ap-5 },
-	     { 0x1.07321489b13eap+0, -0x1.c62aad39f738ap-6 },
-	     { 0x1.062491aee9904p+0, -0x1.847fe3bdead9cp-6 },
-	     { 0x1.05193497a7cc5p+0, -0x1.43183683400acp-6 },
-	     { 0x1.040ff6b5f5e9fp+0, -0x1.01f31c4e1d544p-6 },
-	     { 0x1.0308d19aa6127p+0, -0x1.82201d1e6b69ap-7 },
-	     { 0x1.0203beedb0c67p+0, -0x1.00dd0f3e1bfd6p-7 },
-	     { 0x1.010037d38bcc2p+0, -0x1.ff6fe1feb4e53p-9 },
-	     { 1.0, 0.0 },
-	     { 0x1.fc06d493cca10p-1, 0x1.fe91885ec8e20p-8 },
-	     { 0x1.f81e6ac3b918fp-1, 0x1.fc516f716296dp-7 },
-	     { 0x1.f44546ef18996p-1, 0x1.7bb4dd70a015bp-6 },
-	     { 0x1.f07b10382c84bp-1, 0x1.f84c99b34b674p-6 },
-	     { 0x1.ecbf7070e59d4p-1, 0x1.39f9ce4fb2d71p-5 },
-	     { 0x1.e91213f715939p-1, 0x1.7756c0fd22e78p-5 },
-	     { 0x1.e572a9a75f7b7p-1, 0x1.b43ee82db8f3ap-5 },
-	     { 0x1.e1e0e2c530207p-1, 0x1.f0b3fced60034p-5 },
-	     { 0x1.de5c72d8a8be3p-1, 0x1.165bd78d4878ep-4 },
-	     { 0x1.dae50fa5658ccp-1, 0x1.3425d2715ebe6p-4 },
-	     { 0x1.d77a71145a2dap-1, 0x1.51b8bd91b7915p-4 },
-	     { 0x1.d41c51166623ep-1, 0x1.6f15632c76a47p-4 },
-	     { 0x1.d0ca6ba0bb29fp-1, 0x1.8c3c88ecbe503p-4 },
-	     { 0x1.cd847e8e59681p-1, 0x1.a92ef077625dap-4 },
-	     { 0x1.ca4a499693e00p-1, 0x1.c5ed5745fa006p-4 },
-	     { 0x1.c71b8e399e821p-1, 0x1.e27876de1c993p-4 },
-	     { 0x1.c3f80faf19077p-1, 0x1.fed104fce4cdcp-4 },
-	     { 0x1.c0df92dc2b0ecp-1, 0x1.0d7bd9c17d78bp-3 },
-	     { 0x1.bdd1de3cbb542p-1, 0x1.1b76986cef97bp-3 },
-	     { 0x1.baceb9e1007a3p-1, 0x1.295913d24f750p-3 },
-	     { 0x1.b7d5ef543e55ep-1, 0x1.37239fa295d17p-3 },
-	     { 0x1.b4e749977d953p-1, 0x1.44d68dd78714bp-3 },
-	     { 0x1.b20295155478ep-1, 0x1.52722ebe5d780p-3 },
-	     { 0x1.af279f8e82be2p-1, 0x1.5ff6d12671f98p-3 },
-	     { 0x1.ac5638197fdf3p-1, 0x1.6d64c2389484bp-3 },
-	     { 0x1.a98e2f102e087p-1, 0x1.7abc4da40fddap-3 },
-	     { 0x1.a6cf5606d05c1p-1, 0x1.87fdbda1e8452p-3 },
-	     { 0x1.a4197fc04d746p-1, 0x1.95295b06a5f37p-3 },
-	     { 0x1.a16c80293dc01p-1, 0x1.a23f6d34abbc5p-3 },
-	     { 0x1.9ec82c4dc5bc9p-1, 0x1.af403a28e04f2p-3 },
-	     { 0x1.9c2c5a491f534p-1, 0x1.bc2c06a85721ap-3 },
-	     { 0x1.9998e1480b618p-1, 0x1.c903161240163p-3 },
-	     { 0x1.970d9977c6c2dp-1, 0x1.d5c5aa93287ebp-3 },
-	     { 0x1.948a5c023d212p-1, 0x1.e274051823fa9p-3 },
-	     { 0x1.920f0303d6809p-1, 0x1.ef0e656300c16p-3 },
-	     { 0x1.8f9b698a98b45p-1, 0x1.fb9509f05aa2ap-3 },
-	     { 0x1.8d2f6b81726f6p-1, 0x1.04041821f37afp-2 },
-	     { 0x1.8acae5bb55badp-1, 0x1.0a340a49b3029p-2 },
-	     { 0x1.886db5d9275b8p-1, 0x1.105a7918a126dp-2 },
-	     { 0x1.8617ba567c13cp-1, 0x1.1677819812b84p-2 },
-	     { 0x1.83c8d27487800p-1, 0x1.1c8b405b40c0ep-2 },
-	     { 0x1.8180de3c5dbe7p-1, 0x1.2295d16cfa6b1p-2 },
-	     { 0x1.7f3fbe71cdb71p-1, 0x1.28975066318a2p-2 },
-	     { 0x1.7d055498071c1p-1, 0x1.2e8fd855d86fcp-2 },
-	     { 0x1.7ad182e54f65ap-1, 0x1.347f83d605e59p-2 },
-	     { 0x1.78a42c3c90125p-1, 0x1.3a666d1244588p-2 },
-	     { 0x1.767d342f76944p-1, 0x1.4044adb6f8ec4p-2 },
-	     { 0x1.745c7ef26b00ap-1, 0x1.461a5f077558cp-2 },
-	     { 0x1.7241f15769d0fp-1, 0x1.4be799e20b9c8p-2 },
-	     { 0x1.702d70d396e41p-1, 0x1.51ac76a6b79dfp-2 },
-	     { 0x1.6e1ee3700cd11p-1, 0x1.57690d5744a45p-2 },
-	     { 0x1.6c162fc9cbe02p-1, 0x1.5d1d758e45217p-2 } }
-};
diff --git a/pl/math/v_sinh_3u.c b/pl/math/v_sinh_3u.c
deleted file mode 100644
index a644f54b4a0f75..00000000000000
--- a/pl/math/v_sinh_3u.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Double-precision vector sinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float64x2_t poly[11];
-  float64x2_t inv_ln2, m_ln2, shift;
-  uint64x2_t halff;
-  int64x2_t onef;
-#if WANT_SIMD_EXCEPT
-  uint64x2_t tiny_bound, thresh;
-#else
-  uint64x2_t large_bound;
-#endif
-} data = {
-  /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
-  .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
-	    V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
-	    V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
-	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
-	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
-
-  .inv_ln2 = V2 (0x1.71547652b82fep0),
-  .m_ln2 = (float64x2_t) {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
-  .shift = V2 (0x1.8p52),
-
-  .halff = V2 (0x3fe0000000000000),
-  .onef = V2 (0x3ff0000000000000),
-#if WANT_SIMD_EXCEPT
-  /* 2^-26, below which sinh(x) rounds to x.  */
-  .tiny_bound = V2 (0x3e50000000000000),
-  /* asuint(large_bound) - asuint(tiny_bound).  */
-  .thresh = V2 (0x0230000000000000),
-#else
-/* 2^9. expm1 helper overflows for large input.  */
-  .large_bound = V2 (0x4080000000000000),
-#endif
-};
-
-static inline float64x2_t
-expm1_inline (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  /* Reduce argument:
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where i = round(x / ln2)
-     and   f = x - i * ln2 (f in [-ln2/2, ln2/2]).  */
-  float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
-  int64x2_t i = vcvtq_s64_f64 (j);
-  float64x2_t f = vfmaq_laneq_f64 (x, j, d->m_ln2, 0);
-  f = vfmaq_laneq_f64 (f, j, d->m_ln2, 1);
-  /* Approximate expm1(f) using polynomial.  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t f4 = vmulq_f64 (f2, f2);
-  float64x2_t f8 = vmulq_f64 (f4, f4);
-  float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
-  /* t = 2^i.  */
-  float64x2_t t = vreinterpretq_f64_u64 (
-      vreinterpretq_u64_s64 (vaddq_s64 (vshlq_n_s64 (i, 52), d->onef)));
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
-}
-
-static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x)
-{
-  return v_call_f64 (sinh, x, x, v_u64 (-1));
-}
-
-/* Approximation for vector double-precision sinh(x) using expm1.
-   sinh(x) = (exp(x) - exp(-x)) / 2.
-   The greatest observed error is 2.57 ULP:
-   _ZGVnN2v_sinh (0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
-				       want 0x1.ab34e59d678d9p-2.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  float64x2_t ax = vabsq_f64 (x);
-  uint64x2_t sign
-      = veorq_u64 (vreinterpretq_u64_f64 (x), vreinterpretq_u64_f64 (ax));
-  float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->halff));
-
-#if WANT_SIMD_EXCEPT
-  uint64x2_t special = vcgeq_u64 (
-      vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
-#else
-  uint64x2_t special = vcgeq_u64 (vreinterpretq_u64_f64 (ax), d->large_bound);
-#endif
-
-  /* Fall back to scalar variant for all lanes if any of them are special.  */
-  if (unlikely (v_any_u64 (special)))
-    return special_case (x);
-
-  /* Up to the point that expm1 overflows, we can use it to calculate sinh
-     using a slight rearrangement of the definition of sinh. This allows us to
-     retain acceptable accuracy for very small inputs.  */
-  float64x2_t t = expm1_inline (ax);
-  t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
-  return vmulq_f64 (t, halfsign);
-}
-
-PL_SIG (V, D, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (sinh), 2.08)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (sinh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0, 0x1p-26, 1000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p9, inf, 1000)
diff --git a/pl/math/v_tanh_3u.c b/pl/math/v_tanh_3u.c
deleted file mode 100644
index 5de85c68da2cd3..00000000000000
--- a/pl/math/v_tanh_3u.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Double-precision vector tanh(x) function.
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float64x2_t poly[11];
-  float64x2_t inv_ln2, ln2_hi, ln2_lo, shift;
-  uint64x2_t onef;
-  uint64x2_t thresh, tiny_bound;
-} data = {
-  /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
-  .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
-	    V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
-	    V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
-	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
-	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
-
-  .inv_ln2 = V2 (0x1.71547652b82fep0),
-  .ln2_hi = V2 (-0x1.62e42fefa39efp-1),
-  .ln2_lo = V2 (-0x1.abc9e3b39803fp-56),
-  .shift = V2 (0x1.8p52),
-
-  .onef = V2 (0x3ff0000000000000),
-  .tiny_bound = V2 (0x3e40000000000000), /* asuint64 (0x1p-27).  */
-  /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound).  */
-  .thresh = V2 (0x01f241bf835f9d5f),
-};
-
-static inline float64x2_t
-expm1_inline (float64x2_t x, const struct data *d)
-{
-  /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
-     the scalar variant of tanh.  */
-
-  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
-  float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
-  int64x2_t i = vcvtq_s64_f64 (j);
-  float64x2_t f = vfmaq_f64 (x, j, d->ln2_hi);
-  f = vfmaq_f64 (f, j, d->ln2_lo);
-
-  /* Approximate expm1(f) using polynomial.  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t f4 = vmulq_f64 (f2, f2);
-  float64x2_t p = vfmaq_f64 (
-      f, f2, v_estrin_10_f64 (f, f2, f4, vmulq_f64 (f4, f4), d->poly));
-
-  /* t = 2 ^ i.  */
-  float64x2_t t = vreinterpretq_f64_u64 (
-      vaddq_u64 (vreinterpretq_u64_s64 (i << 52), d->onef));
-  /* expm1(x) = p * t + (t - 1).  */
-  return vfmaq_f64 (vsubq_f64 (t, v_f64 (1)), p, t);
-}
-
-static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
-{
-  return v_call_f64 (tanh, x, y, special);
-}
-
-/* Vector approximation for double-precision tanh(x), using a simplified
-   version of expm1. The greatest observed error is 2.77 ULP:
-   _ZGVnN2v_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
-				       want -0x1.bd6a21a163624p-3.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (tanh) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
-
-  float64x2_t u = x;
-
-  /* Trigger special-cases for tiny, boring and infinity/NaN.  */
-  uint64x2_t special = vcgtq_u64 (vsubq_u64 (ia, d->tiny_bound), d->thresh);
-#if WANT_SIMD_EXCEPT
-  /* To trigger fp exceptions correctly, set special lanes to a neutral value.
-     They will be fixed up later by the special-case handler.  */
-  if (unlikely (v_any_u64 (special)))
-    u = v_zerofy_f64 (u, special);
-#endif
-
-  u = vaddq_f64 (u, u);
-
-  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
-  float64x2_t q = expm1_inline (u, d);
-  float64x2_t qp2 = vaddq_f64 (q, v_f64 (2));
-
-  if (unlikely (v_any_u64 (special)))
-    return special_case (x, vdivq_f64 (q, qp2), special);
-  return vdivq_f64 (q, qp2);
-}
-
-PL_SIG (V, D, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (tanh), 2.27)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (tanh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0, 0x1p-27, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
diff --git a/string/Dir.mk b/string/Dir.mk
index 40ff5acc093e9d..dd8283ec4977a4 100644
--- a/string/Dir.mk
+++ b/string/Dir.mk
@@ -13,9 +13,12 @@ all-string bench-string check-string install-string clean-string:
 else
 
 string-lib-srcs := $(wildcard $(S)/$(ARCH)/*.[cS])
+string-lib-srcs += $(wildcard $(S)/$(ARCH)/experimental/*.[cS])
 string-test-srcs := $(wildcard $(S)/test/*.c)
 string-bench-srcs := $(wildcard $(S)/bench/*.c)
 
+string-arch-include-dir := $(wildcard $(S)/$(ARCH))
+string-arch-includes := $(wildcard $(S)/$(ARCH)/*.h)
 string-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
 
 string-libs := \
@@ -43,6 +46,7 @@ string-tests := \
 
 string-benches := \
 	build/bin/bench/memcpy \
+	build/bin/bench/memset \
 	build/bin/bench/strlen
 
 string-lib-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(string-lib-srcs)))
@@ -64,8 +68,8 @@ string-files := \
 
 all-string: $(string-libs) $(string-tests) $(string-benches) $(string-includes)
 
-$(string-objs): $(string-includes)
-$(string-objs): CFLAGS_ALL += $(string-cflags)
+$(string-objs): $(string-includes) $(string-arch-includes)
+$(string-objs): CFLAGS_ALL += $(string-cflags) -I$(string-arch-include-dir)
 
 $(string-test-objs): CFLAGS_ALL += -D_GNU_SOURCE
 
@@ -101,6 +105,7 @@ check-string: $(string-tests-out)
 bench-string: $(string-benches)
 	$(EMULATOR) build/bin/bench/strlen
 	$(EMULATOR) build/bin/bench/memcpy
+	$(EMULATOR) build/bin/bench/memset
 
 install-string: \
  $(string-libs:build/lib/%=$(DESTDIR)$(libdir)/%) \
diff --git a/string/aarch64/__mtag_tag_region.S b/string/aarch64/__mtag_tag_region.S
index 207e22950c6d3c..34b5789240dacb 100644
--- a/string/aarch64/__mtag_tag_region.S
+++ b/string/aarch64/__mtag_tag_region.S
@@ -27,9 +27,6 @@
 #define zva_val	x4
 
 ENTRY (__mtag_tag_region)
-	PTR_ARG (0)
-	SIZE_ARG (1)
-
 	add	dstend, dstin, count
 
 	cmp	count, 96
diff --git a/string/aarch64/__mtag_tag_zero_region.S b/string/aarch64/__mtag_tag_zero_region.S
index 44b8e0114f4265..2fa248e25621eb 100644
--- a/string/aarch64/__mtag_tag_zero_region.S
+++ b/string/aarch64/__mtag_tag_zero_region.S
@@ -27,9 +27,6 @@
 #define zva_val	x4
 
 ENTRY (__mtag_tag_zero_region)
-	PTR_ARG (0)
-	SIZE_ARG (1)
-
 	add	dstend, dstin, count
 
 	cmp	count, 96
diff --git a/string/aarch64/asmdefs.h b/string/aarch64/asmdefs.h
index 131b95e1fea98f..90166676977a95 100644
--- a/string/aarch64/asmdefs.h
+++ b/string/aarch64/asmdefs.h
@@ -21,19 +21,6 @@
 #define FEATURE_1_PAC 2
 
 /* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
-#ifdef __ILP32__
-#define GNU_PROPERTY(type, value)	\
-  .section .note.gnu.property, "a";	\
-  .p2align 2;				\
-  .word 4;				\
-  .word 12;				\
-  .word 5;				\
-  .asciz "GNU";				\
-  .word type;				\
-  .word 4;				\
-  .word value;				\
-  .text
-#else
 #define GNU_PROPERTY(type, value)	\
   .section .note.gnu.property, "a";	\
   .p2align 3;				\
@@ -46,7 +33,6 @@
   .word value;				\
   .word 0;				\
   .text
-#endif
 
 /* If set then the GNU Property Note section will be added to
    mark objects to support BTI and PAC-RET.  */
@@ -80,27 +66,4 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
 
 #define L(l) .L ## l
 
-#ifdef __ILP32__
-  /* Sanitize padding bits of pointer arguments as per aapcs64 */
-#define PTR_ARG(n)  mov w##n, w##n
-#else
-#define PTR_ARG(n)
-#endif
-
-#ifdef __ILP32__
-  /* Sanitize padding bits of size arguments as per aapcs64 */
-#define SIZE_ARG(n)  mov w##n, w##n
-#else
-#define SIZE_ARG(n)
-#endif
-
-/* Compiler supports SVE instructions  */
-#ifndef HAVE_SVE
-# if __aarch64__ && (__GNUC__ >= 8 || __clang_major__ >= 5)
-#   define HAVE_SVE 1
-# else
-#   define HAVE_SVE 0
-# endif
-#endif
-
 #endif
diff --git a/string/aarch64/memchr-sve.S b/string/aarch64/experimental/memchr-sve.S
similarity index 96%
rename from string/aarch64/memchr-sve.S
rename to string/aarch64/experimental/memchr-sve.S
index b851cf31f2383e..b314551f3e0fea 100644
--- a/string/aarch64/memchr-sve.S
+++ b/string/aarch64/experimental/memchr-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -15,8 +16,6 @@
  */
 
 ENTRY (__memchr_aarch64_sve)
-	PTR_ARG (0)
-	SIZE_ARG (2)
 	dup	z1.b, w1			/* duplicate c to a vector */
 	setffr					/* initialize FFR */
 	mov	x3, 0				/* initialize off */
@@ -59,6 +58,3 @@ ENTRY (__memchr_aarch64_sve)
 	ret
 
 END (__memchr_aarch64_sve)
-
-#endif
-
diff --git a/string/aarch64/memcmp-sve.S b/string/aarch64/experimental/memcmp-sve.S
similarity index 93%
rename from string/aarch64/memcmp-sve.S
rename to string/aarch64/experimental/memcmp-sve.S
index d52ce4555344e5..ad3534836d046f 100644
--- a/string/aarch64/memcmp-sve.S
+++ b/string/aarch64/experimental/memcmp-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -15,9 +16,6 @@
  */
 
 ENTRY (__memcmp_aarch64_sve)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	mov	x3, 0			/* initialize off */
 
 0:	whilelo	p0.b, x3, x2		/* while off < max */
@@ -46,6 +44,3 @@ ENTRY (__memcmp_aarch64_sve)
 	ret
 
 END (__memcmp_aarch64_sve)
-
-#endif
-
diff --git a/string/aarch64/stpcpy-sve.S b/string/aarch64/experimental/stpcpy-sve.S
similarity index 100%
rename from string/aarch64/stpcpy-sve.S
rename to string/aarch64/experimental/stpcpy-sve.S
diff --git a/string/aarch64/strchr-sve.S b/string/aarch64/experimental/strchr-sve.S
similarity index 97%
rename from string/aarch64/strchr-sve.S
rename to string/aarch64/experimental/strchr-sve.S
index ff075167bfefb7..7d74ae9ff232cd 100644
--- a/string/aarch64/strchr-sve.S
+++ b/string/aarch64/experimental/strchr-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -22,7 +23,6 @@
 #endif
 
 ENTRY (FUNC)
-	PTR_ARG (0)
 	dup	z1.b, w1		/* replicate byte across vector */
 	setffr				/* initialize FFR */
 	ptrue	p1.b			/* all ones; loop invariant */
@@ -65,6 +65,3 @@ ENTRY (FUNC)
 	b	0b
 
 END (FUNC)
-
-#endif
-
diff --git a/string/aarch64/strchrnul-sve.S b/string/aarch64/experimental/strchrnul-sve.S
similarity index 100%
rename from string/aarch64/strchrnul-sve.S
rename to string/aarch64/experimental/strchrnul-sve.S
diff --git a/string/aarch64/strcmp-sve.S b/string/aarch64/experimental/strcmp-sve.S
similarity index 96%
rename from string/aarch64/strcmp-sve.S
rename to string/aarch64/experimental/strcmp-sve.S
index eaf909a378f1f5..b6c24958853457 100644
--- a/string/aarch64/strcmp-sve.S
+++ b/string/aarch64/experimental/strcmp-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -15,8 +16,6 @@
  */
 
 ENTRY (__strcmp_aarch64_sve)
-	PTR_ARG (0)
-	PTR_ARG (1)
 	setffr				/* initialize FFR */
 	ptrue	p1.b, all		/* all ones; loop invariant */
 	mov	x2, 0			/* initialize offset */
@@ -54,6 +53,3 @@ ENTRY (__strcmp_aarch64_sve)
 	b	1b
 
 END (__strcmp_aarch64_sve)
-
-#endif
-
diff --git a/string/aarch64/strcpy-sve.S b/string/aarch64/experimental/strcpy-sve.S
similarity index 96%
rename from string/aarch64/strcpy-sve.S
rename to string/aarch64/experimental/strcpy-sve.S
index 00e72dce4451b3..57b77c8a00e7aa 100644
--- a/string/aarch64/strcpy-sve.S
+++ b/string/aarch64/experimental/strcpy-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -22,8 +23,6 @@
 #endif
 
 ENTRY (FUNC)
-	PTR_ARG (0)
-	PTR_ARG (1)
 	setffr				/* initialize FFR */
 	ptrue	p2.b, all		/* all ones; loop invariant */
 	mov	x2, 0			/* initialize offset */
@@ -66,6 +65,3 @@ ENTRY (FUNC)
 	ret
 
 END (FUNC)
-
-#endif
-
diff --git a/string/aarch64/strlen-sve.S b/string/aarch64/experimental/strlen-sve.S
similarity index 96%
rename from string/aarch64/strlen-sve.S
rename to string/aarch64/experimental/strlen-sve.S
index 12ebbdba5c93ae..c83155052c07d2 100644
--- a/string/aarch64/strlen-sve.S
+++ b/string/aarch64/experimental/strlen-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -15,7 +16,6 @@
  */
 
 ENTRY (__strlen_aarch64_sve)
-	PTR_ARG (0)
 	setffr			/* initialize FFR */
 	ptrue	p2.b		/* all ones; loop invariant */
 	mov	x1, 0		/* initialize length */
@@ -50,6 +50,3 @@ ENTRY (__strlen_aarch64_sve)
 	b	0b
 
 END (__strlen_aarch64_sve)
-
-#endif
-
diff --git a/string/aarch64/strncmp-sve.S b/string/aarch64/experimental/strncmp-sve.S
similarity index 95%
rename from string/aarch64/strncmp-sve.S
rename to string/aarch64/experimental/strncmp-sve.S
index 6a9e9f7b6437fd..a281e642d8aaba 100644
--- a/string/aarch64/strncmp-sve.S
+++ b/string/aarch64/experimental/strncmp-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -15,9 +16,6 @@
  */
 
 ENTRY (__strncmp_aarch64_sve)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	setffr				/* initialize FFR */
 	mov	x3, 0			/* initialize off */
 
@@ -64,6 +62,3 @@ ENTRY (__strncmp_aarch64_sve)
 	ret
 
 END (__strncmp_aarch64_sve)
-
-#endif
-
diff --git a/string/aarch64/strnlen-sve.S b/string/aarch64/experimental/strnlen-sve.S
similarity index 96%
rename from string/aarch64/strnlen-sve.S
rename to string/aarch64/experimental/strnlen-sve.S
index 6c43dc427da7a9..11d835a1b13ce9 100644
--- a/string/aarch64/strnlen-sve.S
+++ b/string/aarch64/experimental/strnlen-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -15,8 +16,6 @@
  */
 
 ENTRY (__strnlen_aarch64_sve)
-	PTR_ARG (0)
-	SIZE_ARG (1)
 	setffr				/* initialize FFR */
 	mov	x2, 0			/* initialize len */
 	b	1f
@@ -69,6 +68,3 @@ ENTRY (__strnlen_aarch64_sve)
 	ret
 
 END (__strnlen_aarch64_sve)
-
-#endif
-
diff --git a/string/aarch64/strrchr-sve.S b/string/aarch64/experimental/strrchr-sve.S
similarity index 98%
rename from string/aarch64/strrchr-sve.S
rename to string/aarch64/experimental/strrchr-sve.S
index 825a7384cfc118..731edaddf15650 100644
--- a/string/aarch64/strrchr-sve.S
+++ b/string/aarch64/experimental/strrchr-sve.S
@@ -7,7 +7,8 @@
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
@@ -15,7 +16,6 @@
  */
 
 ENTRY (__strrchr_aarch64_sve)
-	PTR_ARG (0)
 	dup	z1.b, w1		/* replicate byte across vector */
 	setffr				/* initialize FFR */
 	ptrue	p1.b			/* all ones; loop invariant */
@@ -79,6 +79,3 @@ ENTRY (__strrchr_aarch64_sve)
 	ret
 
 END (__strrchr_aarch64_sve)
-
-#endif
-
diff --git a/string/aarch64/memchr-mte.S b/string/aarch64/memchr-mte.S
index 948c3cbc7dd43a..68bd0af9a8c5fa 100644
--- a/string/aarch64/memchr-mte.S
+++ b/string/aarch64/memchr-mte.S
@@ -40,8 +40,6 @@
    exactly which byte matched.  */
 
 ENTRY (__memchr_aarch64_mte)
-	PTR_ARG (0)
-	SIZE_ARG (2)
 	bic	src, srcin, 15
 	cbz	cntin, L(nomatch)
 	ld1	{vdata.16b}, [src]
diff --git a/string/aarch64/memchr.S b/string/aarch64/memchr.S
index fe6cfe2bc0e28d..d12a38abbc3009 100644
--- a/string/aarch64/memchr.S
+++ b/string/aarch64/memchr.S
@@ -47,8 +47,6 @@
  */
 
 ENTRY (__memchr_aarch64)
-	PTR_ARG (0)
-	SIZE_ARG (2)
 	/* Do not dereference srcin if no bytes to compare.  */
 	cbz	cntin, L(zero_length)
 	/*
diff --git a/string/aarch64/memcmp.S b/string/aarch64/memcmp.S
index 35135e72cc8e53..43439de4db69d5 100644
--- a/string/aarch64/memcmp.S
+++ b/string/aarch64/memcmp.S
@@ -30,10 +30,6 @@
 
 
 ENTRY (__memcmp_aarch64)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
-
 	cmp	limit, 16
 	b.lo	L(less16)
 	ldp	data1, data3, [src1]
diff --git a/string/aarch64/memcpy-advsimd.S b/string/aarch64/memcpy-advsimd.S
index 9d3027d4d3cdee..cbf4c581500e40 100644
--- a/string/aarch64/memcpy-advsimd.S
+++ b/string/aarch64/memcpy-advsimd.S
@@ -52,9 +52,6 @@
 
 ENTRY_ALIAS (__memmove_aarch64_simd)
 ENTRY (__memcpy_aarch64_simd)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	add	srcend, src, count
 	cmp	count, 128
 	b.hi	L(copy_long)
diff --git a/string/aarch64/memcpy-mops.S b/string/aarch64/memcpy-mops.S
index b45c31418717cd..03ae95570c0404 100644
--- a/string/aarch64/memcpy-mops.S
+++ b/string/aarch64/memcpy-mops.S
@@ -8,10 +8,6 @@
 #include "asmdefs.h"
 
 ENTRY (__memcpy_aarch64_mops)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
-
 	mov	x3, x0
 	.inst	0x19010443	/* cpyfp   [x3]!, [x1]!, x2!  */
 	.inst	0x19410443	/* cpyfm   [x3]!, [x1]!, x2!  */
diff --git a/string/aarch64/memcpy-sve.S b/string/aarch64/memcpy-sve.S
index e8a946d7db37f4..9b05cb2a58eebe 100644
--- a/string/aarch64/memcpy-sve.S
+++ b/string/aarch64/memcpy-sve.S
@@ -13,8 +13,6 @@
 
 #include "asmdefs.h"
 
-#ifdef HAVE_SVE
-
 .arch armv8-a+sve
 
 #define dstin	x0
@@ -51,10 +49,6 @@
 
 ENTRY_ALIAS (__memmove_aarch64_sve)
 ENTRY (__memcpy_aarch64_sve)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
-
 	cmp	count, 128
 	b.hi	L(copy_long)
 	cntb	vlen
@@ -173,5 +167,3 @@ L(return):
 	ret
 
 END (__memcpy_aarch64_sve)
-
-#endif
diff --git a/string/aarch64/memcpy.S b/string/aarch64/memcpy.S
index 7c0606e2104a04..351f1a11f09728 100644
--- a/string/aarch64/memcpy.S
+++ b/string/aarch64/memcpy.S
@@ -55,9 +55,6 @@
 
 ENTRY_ALIAS (__memmove_aarch64)
 ENTRY (__memcpy_aarch64)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	add	srcend, src, count
 	add	dstend, dstin, count
 	cmp	count, 128
diff --git a/string/aarch64/memmove-mops.S b/string/aarch64/memmove-mops.S
index 6c73017bb16f00..d9839f86e9b4f6 100644
--- a/string/aarch64/memmove-mops.S
+++ b/string/aarch64/memmove-mops.S
@@ -8,10 +8,6 @@
 #include "asmdefs.h"
 
 ENTRY (__memmove_aarch64_mops)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
-
 	mov	x3, x0
 	.inst	0x1d010443	/* cpyp    [x3]!, [x1]!, x2!  */
 	.inst	0x1d410443	/* cpym    [x3]!, [x1]!, x2!  */
diff --git a/string/aarch64/memrchr.S b/string/aarch64/memrchr.S
index 6418bdf56f4148..ed38478a6faad8 100644
--- a/string/aarch64/memrchr.S
+++ b/string/aarch64/memrchr.S
@@ -42,7 +42,6 @@
    exactly which byte matched.  */
 
 ENTRY (__memrchr_aarch64)
-	PTR_ARG (0)
 	add	end, srcin, cntin
 	sub	endm1, end, 1
 	bic	src, endm1, 15
diff --git a/string/aarch64/memset-mops.S b/string/aarch64/memset-mops.S
index ec791493bae9c0..00d8e7d2c05f00 100644
--- a/string/aarch64/memset-mops.S
+++ b/string/aarch64/memset-mops.S
@@ -8,9 +8,6 @@
 #include "asmdefs.h"
 
 ENTRY (__memset_aarch64_mops)
-	PTR_ARG (0)
-	SIZE_ARG (2)
-
 	mov     x3, x0
 	.inst   0x19c10443	/* setp    [x3]!, x2!, x1  */
 	.inst   0x19c14443	/* setm    [x3]!, x2!, x1  */
diff --git a/string/aarch64/memset-sve.S b/string/aarch64/memset-sve.S
new file mode 100644
index 00000000000000..efaeaece284e85
--- /dev/null
+++ b/string/aarch64/memset-sve.S
@@ -0,0 +1,114 @@
+/*
+ * memset - fill memory with a constant byte
+ *
+ * Copyright (c) 2024-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, Advanced SIMD, SVE, unaligned accesses.
+ *
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+#define dstin	x0
+#define val	x1
+#define valw	w1
+#define count	x2
+#define dst	x3
+#define dstend	x4
+#define zva_val	x5
+#define vlen	x5
+#define off	x3
+#define dstend2 x5
+
+ENTRY (__memset_aarch64_sve)
+	dup	v0.16B, valw
+	cmp	count, 16
+	b.lo	L(set_16)
+
+	add	dstend, dstin, count
+	cmp	count, 64
+	b.hs	L(set_128)
+
+	/* Set 16..63 bytes.  */
+	mov	off, 16
+	and	off, off, count, lsr 1
+	sub	dstend2, dstend, off
+	str	q0, [dstin]
+	str	q0, [dstin, off]
+	str	q0, [dstend2, -16]
+	str	q0, [dstend, -16]
+	ret
+
+	.p2align 4
+L(set_16):
+	whilelo p0.b, xzr, count
+	st1b	z0.b, p0, [dstin]
+	ret
+
+	.p2align 4
+L(set_128):
+	bic	dst, dstin, 15
+	cmp	count, 128
+	b.hi	L(set_long)
+	stp	q0, q0, [dstin]
+	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+	.p2align 4
+L(set_long):
+	cmp	count, 256
+	b.lo	L(no_zva)
+	tst	valw, 255
+	b.ne	L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+	mrs	zva_val, dczid_el0
+	and	zva_val, zva_val, 31
+	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
+	b.ne	L(no_zva)
+#endif
+	str	q0, [dstin]
+	str	q0, [dst, 16]
+	bic	dst, dstin, 31
+	stp	q0, q0, [dst, 32]
+	bic	dst, dstin, 63
+	sub	count, dstend, dst	/* Count is now 64 too large.  */
+	sub	count, count, 128	/* Adjust count and bias for loop.  */
+
+	sub	x8, dstend, 1		/* Write last bytes before ZVA loop.  */
+	bic	x8, x8, 15
+	stp	q0, q0, [x8, -48]
+	str	q0, [x8, -16]
+	str	q0, [dstend, -16]
+
+	.p2align 4
+L(zva64_loop):
+	add	dst, dst, 64
+	dc	zva, dst
+	subs	count, count, 64
+	b.hi	L(zva64_loop)
+	ret
+
+L(no_zva):
+	str	q0, [dstin]
+	sub	count, dstend, dst	/* Count is 16 too large.  */
+	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+L(no_zva_loop):
+	stp	q0, q0, [dst, 16]
+	stp	q0, q0, [dst, 48]
+	add	dst, dst, 64
+	subs	count, count, 64
+	b.hi	L(no_zva_loop)
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+END (__memset_aarch64_sve)
diff --git a/string/aarch64/memset.S b/string/aarch64/memset.S
index 553b0fcaefea5e..906a4dcf46c643 100644
--- a/string/aarch64/memset.S
+++ b/string/aarch64/memset.S
@@ -1,7 +1,7 @@
 /*
  * memset - fill memory with a constant byte
  *
- * Copyright (c) 2012-2022, Arm Limited.
+ * Copyright (c) 2012-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -20,93 +20,98 @@
 #define dst	x3
 #define dstend	x4
 #define zva_val	x5
+#define off	x3
+#define dstend2	x5
 
 ENTRY (__memset_aarch64)
-	PTR_ARG (0)
-	SIZE_ARG (2)
-
 	dup	v0.16B, valw
-	add	dstend, dstin, count
-
-	cmp	count, 96
-	b.hi	L(set_long)
 	cmp	count, 16
-	b.hs	L(set_medium)
-	mov	val, v0.D[0]
+	b.lo	L(set_small)
 
-	/* Set 0..15 bytes.  */
-	tbz	count, 3, 1f
-	str	val, [dstin]
-	str	val, [dstend, -8]
+	add	dstend, dstin, count
+	cmp	count, 64
+	b.hs	L(set_128)
+
+	/* Set 16..63 bytes.  */
+	mov	off, 16
+	and	off, off, count, lsr 1
+	sub	dstend2, dstend, off
+	str	q0, [dstin]
+	str	q0, [dstin, off]
+	str	q0, [dstend2, -16]
+	str	q0, [dstend, -16]
 	ret
+
 	.p2align 4
-1:	tbz	count, 2, 2f
-	str	valw, [dstin]
-	str	valw, [dstend, -4]
+	/* Set 0..15 bytes.  */
+L(set_small):
+	add	dstend, dstin, count
+	cmp	count, 4
+	b.lo	2f
+	lsr	off, count, 3
+	sub	dstend2, dstend, off, lsl 2
+	str	s0, [dstin]
+	str	s0, [dstin, off, lsl 2]
+	str	s0, [dstend2, -4]
+	str	s0, [dstend, -4]
 	ret
+
+	/* Set 0..3 bytes.  */
 2:	cbz	count, 3f
+	lsr	off, count, 1
 	strb	valw, [dstin]
-	tbz	count, 1, 3f
-	strh	valw, [dstend, -2]
+	strb	valw, [dstin, off]
+	strb	valw, [dstend, -1]
 3:	ret
 
-	/* Set 17..96 bytes.  */
-L(set_medium):
-	str	q0, [dstin]
-	tbnz	count, 6, L(set96)
-	str	q0, [dstend, -16]
-	tbz	count, 5, 1f
-	str	q0, [dstin, 16]
-	str	q0, [dstend, -32]
-1:	ret
-
 	.p2align 4
-	/* Set 64..96 bytes.  Write 64 bytes from the start and
-	   32 bytes from the end.  */
-L(set96):
-	str	q0, [dstin, 16]
+L(set_128):
+	bic	dst, dstin, 15
+	cmp	count, 128
+	b.hi	L(set_long)
+	stp	q0, q0, [dstin]
 	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
 	ret
 
 	.p2align 4
 L(set_long):
-	and	valw, valw, 255
-	bic	dst, dstin, 15
 	str	q0, [dstin]
-	cmp	count, 160
-	ccmp	valw, 0, 0, hs
+	str	q0, [dst, 16]
+	tst	valw, 255
 	b.ne	L(no_zva)
-
 #ifndef SKIP_ZVA_CHECK
 	mrs	zva_val, dczid_el0
 	and	zva_val, zva_val, 31
 	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
 	b.ne	L(no_zva)
 #endif
-	str	q0, [dst, 16]
 	stp	q0, q0, [dst, 32]
-	bic	dst, dst, 63
+	bic	dst, dstin, 63
 	sub	count, dstend, dst	/* Count is now 64 too large.  */
-	sub	count, count, 128	/* Adjust count and bias for loop.  */
+	sub	count, count, 64 + 64	/* Adjust count and bias for loop.  */
+
+	/* Write last bytes before ZVA loop.  */
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
 
 	.p2align 4
-L(zva_loop):
+L(zva64_loop):
 	add	dst, dst, 64
 	dc	zva, dst
 	subs	count, count, 64
-	b.hi	L(zva_loop)
-	stp	q0, q0, [dstend, -64]
-	stp	q0, q0, [dstend, -32]
+	b.hi	L(zva64_loop)
 	ret
 
+	.p2align 3
 L(no_zva):
-	sub	count, dstend, dst	/* Count is 16 too large.  */
-	sub	dst, dst, 16		/* Dst is biased by -32.  */
-	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+	sub	count, dstend, dst	/* Count is 32 too large.  */
+	sub	count, count, 64 + 32	/* Adjust count and bias for loop.  */
 L(no_zva_loop):
 	stp	q0, q0, [dst, 32]
-	stp	q0, q0, [dst, 64]!
+	stp	q0, q0, [dst, 64]
+	add	dst, dst, 64
 	subs	count, count, 64
 	b.hi	L(no_zva_loop)
 	stp	q0, q0, [dstend, -64]
@@ -114,4 +119,3 @@ L(no_zva_loop):
 	ret
 
 END (__memset_aarch64)
-
diff --git a/string/aarch64/strchr-mte.S b/string/aarch64/strchr-mte.S
index 6ec08f7acc766b..42b747311bc6f5 100644
--- a/string/aarch64/strchr-mte.S
+++ b/string/aarch64/strchr-mte.S
@@ -39,7 +39,6 @@
    If it is not a multiple of 4, there was no match.  */
 
 ENTRY (__strchr_aarch64_mte)
-	PTR_ARG (0)
 	bic	src, srcin, 15
 	dup	vrepchr.16b, chrin
 	ld1	{vdata.16b}, [src]
diff --git a/string/aarch64/strchr.S b/string/aarch64/strchr.S
index 37193bd947a73d..c1d01e9635b6c1 100644
--- a/string/aarch64/strchr.S
+++ b/string/aarch64/strchr.S
@@ -51,7 +51,6 @@
 /* Locals and temporaries.  */
 
 ENTRY (__strchr_aarch64)
-	PTR_ARG (0)
 	/* Magic constant 0xc0300c03 to allow us to identify which lane
 	   matches the requested byte.  Even bits are set if the character
 	   matches, odd bits if either the char is NUL or matches.  */
diff --git a/string/aarch64/strchrnul-mte.S b/string/aarch64/strchrnul-mte.S
index 543ee88bb28585..b3180cdf9e2cec 100644
--- a/string/aarch64/strchrnul-mte.S
+++ b/string/aarch64/strchrnul-mte.S
@@ -38,7 +38,6 @@
    exactly which byte matched.  */
 
 ENTRY (__strchrnul_aarch64_mte)
-	PTR_ARG (0)
 	bic	src, srcin, 15
 	dup	vrepchr.16b, chrin
 	ld1	{vdata.16b}, [src]
diff --git a/string/aarch64/strchrnul.S b/string/aarch64/strchrnul.S
index 666e8d0304c16d..0a32c46c30c558 100644
--- a/string/aarch64/strchrnul.S
+++ b/string/aarch64/strchrnul.S
@@ -47,7 +47,6 @@
 /* Locals and temporaries.  */
 
 ENTRY (__strchrnul_aarch64)
-	PTR_ARG (0)
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the termination condition.  */
 	mov	wtmp2, #0x0401
diff --git a/string/aarch64/strcmp.S b/string/aarch64/strcmp.S
index 137a9aa06681a3..7c0d0485a89ba1 100644
--- a/string/aarch64/strcmp.S
+++ b/string/aarch64/strcmp.S
@@ -51,8 +51,6 @@
 
 
 ENTRY (__strcmp_aarch64)
-	PTR_ARG (0)
-	PTR_ARG (1)
 	sub	off2, src2, src1
 	mov	zeroones, REP8_01
 	and	tmp, src1, 7
diff --git a/string/aarch64/strcpy.S b/string/aarch64/strcpy.S
index 97ae37ea422973..5852616e602494 100644
--- a/string/aarch64/strcpy.S
+++ b/string/aarch64/strcpy.S
@@ -52,8 +52,6 @@
    exactly which byte matched.  */
 
 ENTRY (STRCPY)
-	PTR_ARG (0)
-	PTR_ARG (1)
 	bic	src, srcin, 15
 	ld1	{vdata.16b}, [src]
 	cmeq	vhas_nul.16b, vdata.16b, 0
diff --git a/string/aarch64/strlen-mte.S b/string/aarch64/strlen-mte.S
index 77235797f7c54f..afa72eed9a43f7 100644
--- a/string/aarch64/strlen-mte.S
+++ b/string/aarch64/strlen-mte.S
@@ -33,7 +33,6 @@
    identifies the first zero byte.  */
 
 ENTRY (__strlen_aarch64_mte)
-	PTR_ARG (0)
 	bic	src, srcin, 15
 	ld1	{vdata.16b}, [src]
 	cmeq	vhas_nul.16b, vdata.16b, 0
@@ -41,37 +40,50 @@ ENTRY (__strlen_aarch64_mte)
 	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
 	fmov	synd, dend
 	lsr	synd, synd, shift
-	cbz	synd, L(loop)
+	cbz	synd, L(next16)
 
 	rbit	synd, synd
 	clz	result, synd
 	lsr	result, result, 2
 	ret
 
+L(next16):
+	ldr	data, [src, 16]
+	cmeq	vhas_nul.16b, vdata.16b, 0
+	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
+	fmov	synd, dend
+	cbz	synd, L(loop)
+	add	src, src, 16
+#ifndef __AARCH64EB__
+	rbit	synd, synd
+#endif
+	sub	result, src, srcin
+	clz	tmp, synd
+	add	result, result, tmp, lsr 2
+	ret
+
 	.p2align 5
 L(loop):
-	ldr	data, [src, 16]
+	ldr	data, [src, 32]!
 	cmeq	vhas_nul.16b, vdata.16b, 0
-	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
+	addhn	vend.8b, vhas_nul.8h, vhas_nul.8h
 	fmov	synd, dend
 	cbnz	synd, L(loop_end)
-	ldr	data, [src, 32]!
+	ldr	data, [src, 16]
 	cmeq	vhas_nul.16b, vdata.16b, 0
-	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
+	addhn	vend.8b, vhas_nul.8h, vhas_nul.8h
 	fmov	synd, dend
 	cbz	synd, L(loop)
-	sub	src, src, 16
+	add	src, src, 16
 L(loop_end):
-	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
-	sub	result, src, srcin
-	fmov	synd, dend
+	sub	result, shift, src, lsl 2	/* (srcin - src) << 2.  */
 #ifndef __AARCH64EB__
 	rbit	synd, synd
+	sub	result, result, 3
 #endif
-	add	result, result, 16
 	clz	tmp, synd
-	add	result, result, tmp, lsr 2
+	sub	result, tmp, result
+	lsr	result, result, 2
 	ret
 
 END (__strlen_aarch64_mte)
-
diff --git a/string/aarch64/strlen.S b/string/aarch64/strlen.S
index 6f6f08f636b248..0ebb26be844c1a 100644
--- a/string/aarch64/strlen.S
+++ b/string/aarch64/strlen.S
@@ -75,7 +75,6 @@
    character, return the length, if not, continue in the main loop.  */
 
 ENTRY (__strlen_aarch64)
-	PTR_ARG (0)
 	and	tmp1, srcin, MIN_PAGE_SIZE - 1
 	cmp	tmp1, MIN_PAGE_SIZE - 32
 	b.hi	L(page_cross)
diff --git a/string/aarch64/strncmp.S b/string/aarch64/strncmp.S
index 128a10c52bb175..493a0f06ed1d00 100644
--- a/string/aarch64/strncmp.S
+++ b/string/aarch64/strncmp.S
@@ -55,9 +55,6 @@
 #endif
 
 ENTRY (__strncmp_aarch64)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
diff --git a/string/aarch64/strnlen.S b/string/aarch64/strnlen.S
index f2090a7485a564..6a96ec268f1a6d 100644
--- a/string/aarch64/strnlen.S
+++ b/string/aarch64/strnlen.S
@@ -36,8 +36,6 @@
    identifies the first zero byte.  */
 
 ENTRY (__strnlen_aarch64)
-	PTR_ARG (0)
-	SIZE_ARG (1)
 	bic	src, srcin, 15
 	cbz	cntin, L(nomatch)
 	ld1	{vdata.16b}, [src]
diff --git a/string/aarch64/strrchr-mte.S b/string/aarch64/strrchr-mte.S
index bb61ab9ad4e7c5..8668ce6d291620 100644
--- a/string/aarch64/strrchr-mte.S
+++ b/string/aarch64/strrchr-mte.S
@@ -42,7 +42,6 @@
    if the relevant byte matched the NUL end of string.  */
 
 ENTRY (__strrchr_aarch64_mte)
-	PTR_ARG (0)
 	bic	src, srcin, 15
 	dup	vrepchr.16b, chrin
 	movi	vrepmask.16b, 0x33
diff --git a/string/aarch64/strrchr.S b/string/aarch64/strrchr.S
index bf9cb297b6cb3f..f5713f4260fbca 100644
--- a/string/aarch64/strrchr.S
+++ b/string/aarch64/strrchr.S
@@ -55,7 +55,6 @@
    identify exactly which byte is causing the termination, and why.  */
 
 ENTRY (__strrchr_aarch64)
-	PTR_ARG (0)
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the requested byte.  Magic constant 0x80200802 used
 	   similarly for NUL termination.  */
diff --git a/string/bench/memcpy.c b/string/bench/memcpy.c
index b628f9b60d96e0..583fa505db754c 100644
--- a/string/bench/memcpy.c
+++ b/string/bench/memcpy.c
@@ -20,35 +20,18 @@
 #define MIN_SIZE 32768
 #define MAX_SIZE (1024 * 1024)
 
-static uint8_t a[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(64)));
-static uint8_t b[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(64)));
-
-#define F(x) {#x, x},
-
-static const struct fun
-{
-  const char *name;
-  void *(*fun)(void *, const void *, size_t);
-} funtab[] =
-{
-#if __aarch64__
-  F(__memcpy_aarch64)
-# if __ARM_NEON
-  F(__memcpy_aarch64_simd)
-# endif
-# if __ARM_FEATURE_SVE
-  F(__memcpy_aarch64_sve)
-# endif
-# if WANT_MOPS
-  F(__memcpy_aarch64_mops)
-# endif
-#elif __arm__
-  F(__memcpy_arm)
-#endif
-  F(memcpy)
-#undef F
-  {0, 0}
-};
+static uint8_t a[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(4096)));
+static uint8_t b[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(4096)));
+
+#define DOTEST(STR,TESTFN)			\
+  printf (STR);					\
+  RUN (TESTFN, memcpy);				\
+  RUNA64 (TESTFN, __memcpy_aarch64);		\
+  RUNA64 (TESTFN, __memcpy_aarch64_simd);	\
+  RUNSVE (TESTFN, __memcpy_aarch64_sve);	\
+  RUNMOPS (TESTFN, __memcpy_aarch64_mops);	\
+  RUNA32 (TESTFN, __memcpy_arm);		\
+  printf ("\n");
 
 typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
 typedef struct { uint8_t align; uint16_t freq; } align_data_t;
@@ -160,183 +143,125 @@ init_copies (size_t max_size)
   return total;
 }
 
-int main (void)
+static void inline __attribute ((always_inline))
+memcpy_random (const char *name, void *(*fn)(void *, const void *, size_t))
 {
-  init_copy_distribution ();
-
-  memset (a, 1, sizeof (a));
-  memset (b, 2, sizeof (b));
-
-  printf("Random memcpy (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      size_t total = 0;
-      uint64_t tsum = 0;
-      printf ("%22s ", funtab[f].name);
-      rand32 (0x12345678);
-
-      for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
-	{
-	  size_t copy_size = init_copies (size) * ITERS;
-
-	  for (int c = 0; c < NUM_TESTS; c++)
-	    funtab[f].fun (b + test_arr[c].dst, a + test_arr[c].src,
-			   test_arr[c].len);
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS; i++)
-	    for (int c = 0; c < NUM_TESTS; c++)
-	      funtab[f].fun (b + test_arr[c].dst, a + test_arr[c].src,
-			     test_arr[c].len);
-	  t = clock_get_ns () - t;
-	  total += copy_size;
-	  tsum += t;
-	  printf ("%dK: %.2f ", size / 1024, (double)copy_size / t);
-	}
-      printf( "avg %.2f\n", (double)total / tsum);
-    }
-
-  size_t total = 0;
-  uint64_t tsum = 0;
-  printf ("%22s ", "memcpy_call");
-  rand32 (0x12345678);
-
+  printf ("%22s ", name);
+  uint64_t total = 0, tsum = 0;
   for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
     {
-      size_t copy_size = init_copies (size) * ITERS;
+      uint64_t copy_size = init_copies (size) * ITERS;
 
       for (int c = 0; c < NUM_TESTS; c++)
-	memcpy (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
+	fn (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
 
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS; i++)
 	for (int c = 0; c < NUM_TESTS; c++)
-	  memcpy (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
+	  fn (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
       t = clock_get_ns () - t;
       total += copy_size;
       tsum += t;
-      printf ("%dK: %.2f ", size / 1024, (double)copy_size / t);
+      printf ("%dK: %5.2f ", size / 1024, (double)copy_size / t);
     }
-  printf( "avg %.2f\n", (double)total / tsum);
-
+  printf( "avg %5.2f\n", (double)total / tsum);
+}
 
-  printf ("\nAligned medium memcpy (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 8; size <= 512; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (b, a, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
-	}
-      printf ("\n");
-    }
+static void inline __attribute ((always_inline))
+memcpy_medium_aligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("%22s ", "memcpy_call");
   for (int size = 8; size <= 512; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS2; i++)
-	memcpy (b, a, size);
+	fn (b, a, size);
       t = clock_get_ns () - t;
-      printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+      printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
     }
   printf ("\n");
+}
 
+static void inline __attribute ((always_inline))
+memcpy_medium_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nUnaligned medium memcpy (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 8; size <= 512; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (b + 3, a + 1, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
-	}
-      printf ("\n");
-    }
-
-  printf ("%22s ", "memcpy_call");
   for (int size = 8; size <= 512; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS2; i++)
-	memcpy (b + 3, a + 1, size);
+	fn (b + 3, a + 1, size);
       t = clock_get_ns () - t;
-      printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+      printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
     }
   printf ("\n");
+}
 
+static void inline __attribute ((always_inline))
+memcpy_large (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nLarge memcpy (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1024; size <= 65536; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (b, a, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
-	}
-      printf ("\n");
-    }
-
-  printf ("%22s ", "memcpy_call");
   for (int size = 1024; size <= 65536; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS3; i++)
-	memcpy (b, a, size);
+	fn (b, a, size);
       t = clock_get_ns () - t;
-      printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
+      printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
     }
   printf ("\n");
+}
 
+static void inline __attribute ((always_inline))
+memmove_forward_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nUnaligned forwards memmove (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
+  for (int size = 1024; size <= 65536; size *= 2)
     {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1024; size <= 65536; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (a, a + 256 + (i & 31), size);
-	  t = clock_get_ns () - t;
-	  printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
-	}
-      printf ("\n");
+      uint64_t t = clock_get_ns ();
+      for (int i = 0; i < ITERS3; i++)
+        fn (a, a + 256 + (i & 31), size);
+      t = clock_get_ns () - t;
+      printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
     }
 
+  printf ("\n");
+}
+
+static void inline __attribute ((always_inline))
+memmove_backward_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nUnaligned backwards memmove (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
+  for (int size = 1024; size <= 65536; size *= 2)
     {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1024; size <= 65536; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (a + 256 + (i & 31), a, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
-	}
-      printf ("\n");
+      uint64_t t = clock_get_ns ();
+      for (int i = 0; i < ITERS3; i++)
+	fn (a + 256 + (i & 31), a, size);
+      t = clock_get_ns () - t;
+      printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
     }
+
   printf ("\n");
+}
+
+int main (void)
+{
+  init_copy_distribution ();
+
+  memset (a, 1, sizeof (a));
+  memset (b, 2, sizeof (b));
+
+  DOTEST ("Random memcpy (bytes/ns):\n", memcpy_random);
+  DOTEST ("Medium memcpy aligned (bytes/ns):\n", memcpy_medium_aligned);
+  DOTEST ("Medium memcpy unaligned (bytes/ns):\n", memcpy_medium_unaligned);
+  DOTEST ("Large memcpy (bytes/ns):\n", memcpy_large);
+  DOTEST ("Forwards memmove unaligned (bytes/ns):\n", memmove_forward_unaligned);
+  DOTEST ("Backwards memmove unaligned (bytes/ns):\n", memmove_backward_unaligned);
 
   return 0;
 }
diff --git a/string/bench/memset.c b/string/bench/memset.c
index 990e23ba9a368b..07474e46914625 100644
--- a/string/bench/memset.c
+++ b/string/bench/memset.c
@@ -20,25 +20,16 @@
 #define MIN_SIZE 32768
 #define MAX_SIZE (1024 * 1024)
 
-static uint8_t a[MAX_SIZE + 4096] __attribute__((__aligned__(64)));
+static uint8_t a[MAX_SIZE + 4096] __attribute__((__aligned__(4096)));
 
-#define F(x) {#x, x},
-
-static const struct fun
-{
-  const char *name;
-  void *(*fun)(void *, int, size_t);
-} funtab[] =
-{
-#if __aarch64__
-  F(__memset_aarch64)
-#elif __arm__
-  F(__memset_arm)
-#endif
-  F(memset)
-#undef F
-  {0, 0}
-};
+#define DOTEST(STR,TESTFN)			\
+  printf (STR);					\
+  RUN (TESTFN, memset);				\
+  RUNA64 (TESTFN, __memset_aarch64);		\
+  RUNSVE (TESTFN, __memset_aarch64_sve);	\
+  RUNMOPS (TESTFN, __memset_mops);		\
+  RUNA32 (TESTFN, __memset_arm);		\
+  printf ("\n");
 
 typedef struct { uint32_t offset : 20, len : 12; } memset_test_t;
 static memset_test_t test_arr[NUM_TESTS];
@@ -127,117 +118,73 @@ init_memset (size_t max_size)
   return total;
 }
 
-
-int main (void)
+static void inline __attribute ((always_inline))
+memset_random (const char *name, void *(*set)(void *, int, size_t))
 {
-  init_memset_distribution ();
-
-  memset (a, 1, sizeof (a));
-
-  printf("Random memset (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      size_t total_size = 0;
-      uint64_t tsum = 0;
-      printf ("%22s ", funtab[f].name);
-      rand32 (0x12345678);
-
-      for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
-	{
-	  size_t memset_size = init_memset (size) * ITERS;
-
-	  for (int c = 0; c < NUM_TESTS; c++)
-	    funtab[f].fun (a + test_arr[c].offset, 0, test_arr[c].len);
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS; i++)
-	    for (int c = 0; c < NUM_TESTS; c++)
-	      funtab[f].fun (a + test_arr[c].offset, 0, test_arr[c].len);
-	  t = clock_get_ns () - t;
-	  total_size += memset_size;
-	  tsum += t;
-	  printf ("%dK: %.2f ", size / 1024, (double)memset_size / t);
-	}
-      printf( "avg %.2f\n", (double)total_size / tsum);
-    }
-
-  size_t total_size = 0;
+  uint64_t total_size = 0;
   uint64_t tsum = 0;
-  printf ("%22s ", "memset_call");
+  printf ("%22s ", name);
   rand32 (0x12345678);
 
   for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
     {
-      size_t memset_size = init_memset (size) * ITERS;
+      uint64_t memset_size = init_memset (size) * ITERS;
 
       for (int c = 0; c < NUM_TESTS; c++)
-	memset (a + test_arr[c].offset, 0, test_arr[c].len);
+	set (a + test_arr[c].offset, 0, test_arr[c].len);
 
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS; i++)
 	for (int c = 0; c < NUM_TESTS; c++)
-	  memset (a + test_arr[c].offset, 0, test_arr[c].len);
+	  set (a + test_arr[c].offset, 0, test_arr[c].len);
       t = clock_get_ns () - t;
       total_size += memset_size;
       tsum += t;
-      printf ("%dK: %.2f ", size / 1024, (double)memset_size / t);
+      printf ("%dK: %5.2f ", size / 1024, (double)memset_size / t);
     }
-  printf( "avg %.2f\n", (double)total_size / tsum);
-
+  printf( "avg %5.2f\n", (double)total_size / tsum);
+}
 
-  printf ("\nMedium memset (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 8; size <= 512; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (a, 0, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
-	}
-      printf ("\n");
-    }
+static void inline __attribute ((always_inline))
+memset_medium (const char *name, void *(*set)(void *, int, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("%22s ", "memset_call");
   for (int size = 8; size <= 512; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS2; i++)
-	memset (a, 0, size);
+	set (a, 0, size);
       t = clock_get_ns () - t;
-      printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+      printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
     }
+  printf ("\n");
+}
 
+static void inline __attribute ((always_inline))
+memset_large (const char *name, void *(*set)(void *, int, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nLarge memset (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1024; size <= 65536; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (a, 0, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
-	}
-      printf ("\n");
-    }
-
-  printf ("%22s ", "memset_call");
   for (int size = 1024; size <= 65536; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS3; i++)
-	memset (a, 0, size);
+	set (a, 0, size);
       t = clock_get_ns () - t;
-      printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
+      printf ("%dKB: %6.2f ", size / 1024, (double)size * ITERS3 / t);
     }
-  printf ("\n\n");
+  printf ("\n");
+}
+
+int main (void)
+{
+  init_memset_distribution ();
+
+  memset (a, 1, sizeof (a));
 
+  DOTEST ("Random memset (bytes/ns):\n", memset_random);
+  DOTEST ("Medium memset (bytes/ns):\n", memset_medium);
+  DOTEST ("Large memset (bytes/ns):\n", memset_large);
   return 0;
 }
diff --git a/string/bench/strlen.c b/string/bench/strlen.c
index f05d0d5b89e6f1..a8dd55cf5fc4f2 100644
--- a/string/bench/strlen.c
+++ b/string/bench/strlen.c
@@ -14,40 +14,23 @@
 #include "benchlib.h"
 
 #define ITERS 5000
-#define ITERS2 20000000
-#define ITERS3 2000000
-#define NUM_TESTS 16384
+#define ITERS2 40000000
+#define ITERS3 4000000
+#define NUM_TESTS 65536
 
 #define MAX_ALIGN 32
-#define MAX_STRLEN 256
+#define MAX_STRLEN 128
 
 static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
 
-#define F(x, mte) {#x, x, mte},
-
-static const struct fun
-{
-  const char *name;
-  size_t (*fun) (const char *s);
-  int test_mte;
-} funtab[] = {
-  // clang-format off
-  F(strlen, 0)
-#if __aarch64__
-  F(__strlen_aarch64, 0)
-  F(__strlen_aarch64_mte, 1)
-# if __ARM_FEATURE_SVE
-  F(__strlen_aarch64_sve, 1)
-# endif
-#elif __arm__
-# if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
-  F(__strlen_armv6t2, 0)
-# endif
-#endif
-  {0, 0, 0}
-  // clang-format on
-};
-#undef F
+#define DOTEST(STR,TESTFN)			\
+  printf (STR);					\
+  RUN (TESTFN, strlen);				\
+  RUNA64 (TESTFN, __strlen_aarch64);		\
+  RUNA64 (TESTFN, __strlen_aarch64_mte);	\
+  RUNSVE (TESTFN, __strlen_aarch64_sve);	\
+  RUNT32 (TESTFN, __strlen_armv6t2);		\
+  printf ("\n");
 
 static uint16_t strlen_tests[NUM_TESTS];
 
@@ -124,98 +107,119 @@ init_strlen_tests (void)
 
       strlen_tests[n] =
 	index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
+      assert ((strlen_tests[n] & (align - 1)) == 0);
+      assert (strlen (a + strlen_tests[n]) == exp_len);
     }
 }
 
 static volatile size_t maskv = 0;
 
-int main (void)
+static void inline __attribute ((always_inline))
+strlen_random (const char *name, size_t (*fn)(const char *))
 {
-  rand32 (0x12345678);
-  init_strlen_distribution ();
-  init_strlen_tests ();
+  size_t res = 0, mask = maskv;
+  uint64_t strlen_size = 0;
+  printf ("%22s ", name);
+
+  for (int c = 0; c < NUM_TESTS; c++)
+    strlen_size += fn (a + strlen_tests[c]) + 1;
+  strlen_size *= ITERS;
+
+  /* Measure throughput of strlen.  */
+  uint64_t t = clock_get_ns ();
+  for (int i = 0; i < ITERS; i++)
+    for (int c = 0; c < NUM_TESTS; c++)
+      res += fn (a + strlen_tests[c]);
+  t = clock_get_ns () - t;
+  printf ("tp: %.3f ", (double)strlen_size / t);
+
+  /* Measure latency of strlen result with (res & mask).  */
+  t = clock_get_ns ();
+  for (int i = 0; i < ITERS; i++)
+    for (int c = 0; c < NUM_TESTS; c++)
+      res += fn (a + strlen_tests[c] + (res & mask));
+  t = clock_get_ns () - t;
+  printf ("lat: %.3f\n", (double)strlen_size / t);
+  maskv = res & mask;
+}
 
-  printf ("\nRandom strlen (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      size_t res = 0, strlen_size = 0, mask = maskv;
-      printf ("%22s ", funtab[f].name);
+static void inline __attribute ((always_inline))
+strlen_small_aligned (const char *name, size_t (*fn)(const char *))
+{
+  printf ("%22s ", name);
 
-      for (int c = 0; c < NUM_TESTS; c++)
-	strlen_size += funtab[f].fun (a + strlen_tests[c]);
-      strlen_size *= ITERS;
+  size_t res = 0, mask = maskv;
+  for (int size = 1; size <= 64; size *= 2)
+    {
+      memset (a, 'x', size);
+      a[size - 1] = 0;
 
-      /* Measure latency of strlen result with (res & mask).  */
       uint64_t t = clock_get_ns ();
-      for (int i = 0; i < ITERS; i++)
-	for (int c = 0; c < NUM_TESTS; c++)
-	  res = funtab[f].fun (a + strlen_tests[c] + (res & mask));
+      for (int i = 0; i < ITERS2; i++)
+	res += fn (a + (i & mask));
       t = clock_get_ns () - t;
-      printf ("%.2f\n", (double)strlen_size / t);
+      printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+	      size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
     }
+  maskv &= res;
+  printf ("\n");
+}
 
-  printf ("\nSmall aligned strlen (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1; size <= 64; size *= 2)
-	{
-	  memset (a, 'x', size);
-	  a[size - 1] = 0;
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (a);
-	  t = clock_get_ns () - t;
-	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
-		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
-	}
-      printf ("\n");
-    }
+static void inline __attribute ((always_inline))
+strlen_small_unaligned (const char *name, size_t (*fn)(const char *))
+{
+  printf ("%22s ", name);
 
-  printf ("\nSmall unaligned strlen (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
+  size_t res = 0, mask = maskv;
+  int align = 9;
+  for (int size = 1; size <= 64; size *= 2)
     {
-      printf ("%22s ", funtab[f].name);
-
-      int align = 9;
-      for (int size = 1; size <= 64; size *= 2)
-	{
-	  memset (a + align, 'x', size);
-	  a[align + size - 1] = 0;
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (a + align);
-	  t = clock_get_ns () - t;
-	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
-		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
-	}
-      printf ("\n");
+      memset (a + align, 'x', size);
+      a[align + size - 1] = 0;
+
+      uint64_t t = clock_get_ns ();
+      for (int i = 0; i < ITERS2; i++)
+	res += fn (a + align + (i & mask));
+      t = clock_get_ns () - t;
+      printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+	      size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
     }
+  maskv &= res;
+  printf ("\n");
+}
 
-  printf ("\nMedium strlen (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
+static void inline __attribute ((always_inline))
+strlen_medium (const char *name, size_t (*fn)(const char *))
+{
+  printf ("%22s ", name);
+
+  size_t res = 0, mask = maskv;
+  for (int size = 128; size <= 4096; size *= 2)
     {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 128; size <= 4096; size *= 2)
-	{
-	  memset (a, 'x', size);
-	  a[size - 1] = 0;
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (a);
-	  t = clock_get_ns () - t;
-	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
-		  size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
-	}
-      printf ("\n");
-    }
+      memset (a, 'x', size);
+      a[size - 1] = 0;
 
+      uint64_t t = clock_get_ns ();
+      for (int i = 0; i < ITERS3; i++)
+	res += fn (a + (i & mask));
+      t = clock_get_ns () - t;
+      printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+	      size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
+    }
+  maskv &= res;
   printf ("\n");
+}
+
+int main (void)
+{
+  rand32 (0x12345678);
+  init_strlen_distribution ();
+  init_strlen_tests ();
+
+  DOTEST ("Random strlen (bytes/ns):\n", strlen_random);
+  DOTEST ("Small aligned strlen (bytes/ns):\n", strlen_small_aligned);
+  DOTEST ("Small unaligned strlen (bytes/ns):\n", strlen_small_unaligned);
+  DOTEST ("Medium strlen (bytes/ns):\n", strlen_medium);
 
   return 0;
 }
diff --git a/string/include/benchlib.h b/string/include/benchlib.h
index f1bbea388cd217..486504e99ddf0d 100644
--- a/string/include/benchlib.h
+++ b/string/include/benchlib.h
@@ -30,4 +30,35 @@ rand32 (uint32_t seed)
   return res;
 }
 
+/* Macros to run a benchmark BENCH using string function FN.  */
+#define RUN(BENCH, FN) BENCH(#FN, FN)
 
+#if __aarch64__
+# define RUNA64(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNA64(BENCH, FN)
+#endif
+
+#if __ARM_FEATURE_SVE
+# define RUNSVE(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNSVE(BENCH, FN)
+#endif
+
+#if WANT_MOPS
+# define RUNMOPS(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNMOPS(BENCH, FN)
+#endif
+
+#if __arm__
+# define RUNA32(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNA32(BENCH, FN)
+#endif
+
+#if __arm__ && __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
+# define RUNT32(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNT32(BENCH, FN)
+#endif
diff --git a/string/include/stringlib.h b/string/include/stringlib.h
index 01da7ebfc18db9..bb9db930f132ed 100644
--- a/string/include/stringlib.h
+++ b/string/include/stringlib.h
@@ -33,13 +33,12 @@ char *__strchr_aarch64_mte (const char *, int);
 char * __strchrnul_aarch64_mte (const char *, int );
 size_t __strlen_aarch64_mte (const char *);
 char *__strrchr_aarch64_mte (const char *, int);
-#if __ARM_NEON
 void *__memcpy_aarch64_simd (void *__restrict, const void *__restrict, size_t);
 void *__memmove_aarch64_simd (void *, const void *, size_t);
-#endif
 # if __ARM_FEATURE_SVE
 void *__memcpy_aarch64_sve (void *__restrict, const void *__restrict, size_t);
 void *__memmove_aarch64_sve (void *__restrict, const void *__restrict, size_t);
+void *__memset_aarch64_sve (void *, int, size_t);
 void *__memchr_aarch64_sve (const void *, int, size_t);
 int __memcmp_aarch64_sve (const void *, const void *, size_t);
 char *__strchr_aarch64_sve (const char *, int);
diff --git a/string/test/memcpy.c b/string/test/memcpy.c
index dc95844bd45a8c..98255e06f31c55 100644
--- a/string/test/memcpy.c
+++ b/string/test/memcpy.c
@@ -25,9 +25,7 @@ static const struct fun
   F(memcpy, 0)
 #if __aarch64__
   F(__memcpy_aarch64, 1)
-# if __ARM_NEON
   F(__memcpy_aarch64_simd, 1)
-# endif
 # if __ARM_FEATURE_SVE
   F(__memcpy_aarch64_sve, 1)
 # endif
diff --git a/string/test/memmove.c b/string/test/memmove.c
index b85dd1e864effe..ff3f7652f76329 100644
--- a/string/test/memmove.c
+++ b/string/test/memmove.c
@@ -25,9 +25,7 @@ static const struct fun
   F(memmove, 0)
 #if __aarch64__
   F(__memmove_aarch64, 1)
-# if __ARM_NEON
   F(__memmove_aarch64_simd, 1)
-# endif
 # if __ARM_FEATURE_SVE
   F(__memmove_aarch64_sve, 1)
 # endif
diff --git a/string/test/memset.c b/string/test/memset.c
index 7d09c267ffecfc..a9639f9b28b0a5 100644
--- a/string/test/memset.c
+++ b/string/test/memset.c
@@ -25,6 +25,9 @@ static const struct fun
   F(memset, 0)
 #if __aarch64__
   F(__memset_aarch64, 1)
+# if __ARM_FEATURE_SVE
+  F(__memset_aarch64_sve, 1)
+# endif
 # if WANT_MOPS
   F(__memset_aarch64_mops, 1)
 # endif

From bc77aa7df7339b166c0d6394526fe59dea89f4b1 Mon Sep 17 00:00:00 2001
From: Doug Rabson <dfr@FreeBSD.org>
Date: Mon, 23 Dec 2024 10:19:27 +0000
Subject: [PATCH 100/143] release: install etc files from the source tree, not
 the host

Reviewed by: cperciva
MFC after:   3 days
Differential Revision:	https://reviews.freebsd.org/D48180
---
 release/Makefile.oci                |  2 ++
 release/tools/oci-image-static.conf | 21 ++++++++++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/release/Makefile.oci b/release/Makefile.oci
index 461c95f49636b6..da35156c5a9592 100644
--- a/release/Makefile.oci
+++ b/release/Makefile.oci
@@ -24,6 +24,8 @@ OCI_DEPS_minimal= container-image-dynamic.txz
 .for _IMG in ${OCI_IMAGES}
 OCI_TARGETS+= container-image-${_IMG}.txz
 container-image-${_IMG}.txz: ${OCI_DEPS_${_IMG}}
+	# Adjust PATH so that we run pwd_mkdb from the bootstrap tools
+	env PATH=${OBJTOP}/tmp/legacy/bin:${PATH:Q} \
 	sh ${.CURDIR}/scripts/make-oci-image.sh ${.CURDIR} ${REVISION} ${BRANCH} ${TARGET_ARCH} ${_IMG}
 	skopeo copy \
 		containers-storage:localhost/freebsd${REVISION:R}-${_IMG}:latest \
diff --git a/release/tools/oci-image-static.conf b/release/tools/oci-image-static.conf
index 27cfb3c6778cc4..552328e66f3cab 100644
--- a/release/tools/oci-image-static.conf
+++ b/release/tools/oci-image-static.conf
@@ -7,17 +7,20 @@
 OCI_BASE_IMAGE=
 
 oci_image_build() {
-	mtree -deU -p $m/ -f /etc/mtree/BSD.root.dist > /dev/null
-	mtree -deU -p $m/var -f /etc/mtree/BSD.var.dist > /dev/null
-	mtree -deU -p $m/usr -f /etc/mtree/BSD.usr.dist > /dev/null
-	mtree -deU -p $m/usr/include -f /etc/mtree/BSD.include.dist > /dev/null
-	mtree -deU -p $m/usr/lib -f /etc/mtree/BSD.debug.dist > /dev/null
+	local srcdir=${curdir}/..
+	mtree -deU -p $m/ -f ${srcdir}/etc/mtree/BSD.root.dist > /dev/null
+	mtree -deU -p $m/var -f ${srcdir}/etc/mtree/BSD.var.dist > /dev/null
+	mtree -deU -p $m/usr -f ${srcdir}/etc/mtree/BSD.usr.dist > /dev/null
+	mtree -deU -p $m/usr/include -f ${srcdir}/etc/mtree/BSD.include.dist > /dev/null
+	mtree -deU -p $m/usr/lib -f ${srcdir}/etc/mtree/BSD.debug.dist > /dev/null
 	install_packages ${abi} ${workdir} $m FreeBSD-caroot FreeBSD-zoneinfo
-	cp /etc/master.passwd $m/etc
+	cp ${srcdir}/etc/master.passwd $m/etc
 	pwd_mkdb -p -d $m/etc $m/etc/master.passwd || return $?
-	cp /etc/group $m/etc || return $?
-	cp /etc/termcap.small $m/etc/termcap.small || return $?
-	cp /etc/termcap.small $m/usr/share/misc/termcap || return $?
+	cp ${srcdir}/etc/group $m/etc || return $?
+	# termcap.small is generated so we get it from OBJDIR - make sets our
+	# working directory to OBJDIR/release
+	cp ../etc/termcap/termcap.small $m/etc/termcap.small || return $?
+	cp ../etc/termcap/termcap.small $m/usr/share/misc/termcap || return $?
 	env DESTDIR=$m /usr/sbin/certctl rehash
 	# Generate a suitable repo config for pkgbase
 	case ${branch} in

From 84de8c51d1a0fff1c65cd1ec44dd3c3a0e7904eb Mon Sep 17 00:00:00 2001
From: Rick Macklem <rmacklem@FreeBSD.org>
Date: Fri, 10 Jan 2025 06:49:45 -0800
Subject: [PATCH 101/143] nfsd: Add vfs.nfsd.testing_disable_grace sysctl

The grace time of 2 minutes plus when the nfsd is started
is needed for normal operation.  It allows client(s) to
recovery open/lock state.  However, for testing situations
where there are no client(s) to recover state, it introduces
an unacceptable delay.

The new per-vnet jail sysctl can be set non-zero to disable
the grace period.  It should only be used for testing and
can be applied on a per-jail basis.  It must be set before
the nfsd is started up.

Requested by:	asomers
Tested by:	asomers
---
 sys/fs/nfsserver/nfs_nfsdstate.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index 6cd8c1c861ec77..d1639f48451c62 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -115,6 +115,11 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
     &nfsrv_flexlinuxhack, 0,
     "For Linux clients, hack around Flex File Layout bug");
 
+NFSD_VNET_DEFINE_STATIC(bool, nfsd_disable_grace) = false;
+SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, testing_disable_grace,
+    CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_disable_grace),
+    0, "Disable grace for testing");
+
 /*
  * Hash lists for nfs V4.
  */
@@ -4381,11 +4386,13 @@ nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
 		 * ReclaimComplete.  If so, grace can end now.
 		 */
 		notreclaimed = 0;
-		LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
-		    nst_list) {
-			if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
-				notreclaimed = 1;
-				break;
+		if (!NFSD_VNET(nfsd_disable_grace)) {
+			LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
+			    nst_list) {
+				if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
+					notreclaimed = 1;
+					break;
+				}
 			}
 		}
 		if (notreclaimed == 0)

From 5e7d93a604400ca3c9db3be1df82ce963527740c Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:13:31 +0200
Subject: [PATCH 102/143] lib/libc/aarch64/string: add strcmp SIMD
 implementation

This changeset includes a port of the SIMD implementation of strcmp
for amd64 to Aarch64.

Below is a description of its method as described in D41971.

The basic idea is to process the bulk of the string in aligned
blocks of 16 bytes such that one string runs ahead and the other
runs behind. The string that runs ahead is checked for NUL bytes,
the one that runs behind is compared with the corresponding chunk
of the string that runs ahead. This trades an extra load per
iteration for the very complicated block-reassembly needed in the
other implementations (bionic, glibc). On the flip side, we need
two code paths depending on the relative alignment of the two
buffers.

The initial part of the string is compared directly if it is known
not to cross a page boundary. Otherwise, a complex slow path to
avoid crossing into unmapped memory commences.

Performance is better in most cases than the existing
implementation from the Arm Optimized Routines repository.

See the DR for benchmark results.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D45839
---
 lib/libc/aarch64/string/Makefile.inc |   4 +-
 lib/libc/aarch64/string/strcmp.S     | 350 +++++++++++++++++++++++++++
 2 files changed, 353 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/strcmp.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index cabc79e4f35140..ba0947511872cf 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -13,13 +13,15 @@ AARCH64_STRING_FUNCS= \
 	stpcpy \
 	strchr \
 	strchrnul \
-	strcmp \
 	strcpy \
 	strlen \
 	strncmp \
 	strnlen \
 	strrchr
 
+# SIMD-enhanced routines not derived from Arm's code
+MDSRCS+= \
+	strcmp.S
 #
 # Add the above functions. Generate an asm file that includes the needed
 # Arm Optimized Routines file defining the function name to the libc name.
diff --git a/lib/libc/aarch64/string/strcmp.S b/lib/libc/aarch64/string/strcmp.S
new file mode 100644
index 00000000000000..e8418dfc6763a0
--- /dev/null
+++ b/lib/libc/aarch64/string/strcmp.S
@@ -0,0 +1,350 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <machine/asm.h>
+#include <machine/param.h>
+
+	.weak	strcmp
+	.set	strcmp, __strcmp
+	.text
+
+ENTRY(__strcmp)
+
+	bic	x8, x0, #0xf			// x0 aligned to the boundary
+	and	x9, x0, #0xf			// x9 is the offset
+	bic	x10, x1, #0xf			// x1 aligned to the boundary
+	and	x11, x1, #0xf			// x11 is the offset
+
+	mov	x13, #-1
+
+	/*
+	 * Check if either string is located at end of page to avoid crossing
+	 * into unmapped page. If so, we load 16 bytes from the nearest
+	 * alignment boundary and shift based on the offset.
+	 */
+
+	add	x3, x0, #16			// end of head
+	add	x4, x1, #16
+	eor	x3, x3, x0
+	eor	x4, x4, x1			// bits that changed
+	orr	x3, x3, x4			// in either str1 or str2
+	tbz	w3, #PAGE_SHIFT, .Lbegin
+
+	ldr	q0, [x8]			// load aligned head
+	ldr	q2, [x10]
+
+	lsl	x14, x9, #2
+	lsl	x15, x11, #2
+	lsl	x3, x13, x14			// string head
+	lsl	x4, x13, x15
+
+	cmeq	v5.16b, v0.16b, #0
+	cmeq	v6.16b, v2.16b, #0
+
+	shrn	v5.8b, v5.8h, #4
+	shrn	v6.8b, v6.8h, #4
+	fmov	x5, d5
+	fmov	x6, d6
+
+	adrp	x2, shift_data
+	add	x2, x2, :lo12:shift_data
+
+	/* heads may cross page boundary, avoid unmapped loads */
+	tst	x5, x3
+	b.eq	0f
+
+	ldr	q4, [x2, x9]			// load permutation table
+	tbl	v0.16b, {v0.16b}, v4.16b
+
+	b		1f
+	.p2align 4
+0:
+	ldr	q0, [x0]			// load true head
+1:
+	tst	x6, x4
+	b.eq	0f
+
+	ldr	q4, [x2, x11]
+	tbl	v4.16b, {v2.16b}, v4.16b
+
+	b 1f
+
+	.p2align 4
+.Lbegin:
+	ldr	q0, [x0]			// load true heads
+0:
+	ldr	q4, [x1]
+1:
+
+	cmeq	v2.16b, v0.16b, #0		// NUL byte present?
+	cmeq	v4.16b, v0.16b, v4.16b		// which bytes match?
+
+	orn	v2.16b, v2.16b, v4.16b		// mismatch or NUL byte?
+
+	shrn	v2.8b, v2.8h, #4
+	fmov	x5, d2
+
+	cbnz	x5, .Lhead_mismatch
+
+	ldr	q2, [x8, #16]			// load second chunk
+	ldr	q3, [x10, #16]
+	subs	x9, x9, x11			// is a&0xf >= b&0xf
+	b.lo	.Lswapped			// if not swap operands
+	sub	x12, x10, x9
+	ldr	q0, [x12, #16]!
+	sub	x10, x10, x8
+	sub	x11, x10, x9
+
+	cmeq	v1.16b, v3.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+	add	x8, x8, #16
+	shrn	v1.8b, v1.8h, #4
+	fmov	x6, d1
+	shrn	v0.8b, v0.8h, #4
+	fmov	x5, d0
+	cbnz	x6, .Lnulfound
+	mvn	x5, x5
+	cbnz	x5, .Lmismatch
+	add	x8, x8, #16			// advance aligned pointers
+
+	/*
+	 * During the main loop, the layout of the two strings is something like:
+	 *
+	 *          v ------1------ v ------2------ v
+	 *      X0:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
+	 *      X1: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
+	 *
+	 * where v indicates the alignment boundaries and corresponding chunks
+	 * of the strings have the same letters.  Chunk A has been checked in
+	 * the previous iteration.  This iteration, we first check that string
+	 * X1 doesn't end within region 2, then we compare chunk B between the
+	 * two strings.  As X1 is known not to hold a NUL byte in regions 1
+	 * and 2 at this point, this also ensures that x0 has not ended yet.
+	 */
+	.p2align 4
+0:
+	ldr	q0, [x8, x11]
+	ldr	q1, [x8, x10]
+	ldr	q2, [x8]
+
+	cmeq	v1.16b, v1.16b, #0		// end of string?
+	cmeq	v0.16b, v0.16b, v2.16b		// do the chunks match?
+
+	shrn	v1.8b, v1.8h, #4
+	fmov	x6, d1
+	shrn	v0.8b, v0.8h, #4
+	fmov	x5, d0
+	cbnz	x6, .Lnulfound
+	mvn	x5, x5				// any mismatches?
+	cbnz	x5, .Lmismatch
+
+	add	x8, x8, #16
+
+	ldr	q0, [x8, x11]
+	ldr	q1, [x8, x10]
+	ldr	q2, [x8]
+
+	add	x8, x8, #16
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	fmov	x6, d1
+	shrn	v0.8b, v0.8h, #4
+	fmov	x5, d0
+	cbnz	x6, .Lnulfound2
+	mvn	x5, x5
+	cbz	x5, 0b
+
+	sub	x8, x8, #16			// roll back second increment
+.Lmismatch:
+	rbit	x2, x5
+	clz	x2, x2				// index of mismatch
+	lsr	x2, x2, #2
+	add	x11, x8, x11
+
+	ldrb	w4, [x8, x2]
+	ldrb	w5, [x11, x2]
+	sub	w0, w4, w5			// byte difference
+	ret
+
+	.p2align 4
+.Lnulfound2:
+	sub	x8, x8, #16
+
+.Lnulfound:
+	mov	x7, x9
+	mov	x4, x6
+
+	ubfiz	x7, x7, #2, #4			// x7 = (x7 & 0xf) << 2
+	lsl	x6, x6, x7			// adjust NUL mask to indices
+	orn	x5, x6, x5
+	cbnz	x5, .Lmismatch
+
+	/*
+	 * (x0) == (x1) and NUL is past the string.
+	 * Compare (x1) with the corresponding part
+	 * of the other string until the NUL byte.
+	 */
+	ldr	q0, [x8, x9]
+	ldr	q1, [x8, x10]
+
+	cmeq	v1.16b, v0.16b, v1.16b
+	shrn	v1.8b, v1.8h, #4
+	fmov	x5, d1
+
+	orn	x5, x4, x5
+
+	rbit	x2, x5
+	clz	x2, x2
+	lsr	x5, x2, #2
+
+	add	x10, x10, x8			// restore x10 pointer
+	add	x8, x8, x9			// point to corresponding chunk
+
+	ldrb	w4, [x8, x5]
+	ldrb	w5, [x10, x5]
+	sub	w0, w4, w5
+	ret
+
+	.p2align 4
+.Lhead_mismatch:
+	rbit	x2, x5
+	clz	x2, x2				// index of mismatch
+	lsr	x2, x2, #2
+	ldrb	w4, [x0, x2]
+	ldrb	w5, [x1, x2]
+	sub	w0, w4, w5
+	ret
+
+	/*
+	 * If (a&0xf) < (b&0xf), we do the same thing but with swapped
+	 * operands.  I found that this performs slightly better than
+	 * using conditional moves to do the swap branchless.
+	 */
+	.p2align 4
+.Lswapped:
+	add	x12, x8, x9
+	ldr	q0, [x12, #16]!
+	sub	x8, x8, x10
+	add	x11, x8, x9
+	neg	x9, x9
+
+	cmeq	v1.16b, v2.16b, #0
+	cmeq	v0.16b, v0.16b, v3.16b
+	add	x10, x10, #16
+	shrn	v1.8b, v1.8h, #4
+	fmov	x6, d1
+	shrn	v0.8b, v0.8h, #4
+	fmov	x5, d0
+	cbnz	x6, .Lnulfounds
+	mvn	x5, x5
+	cbnz	x5, .Lmismatchs
+	add	x10, x10, #16
+
+	/*
+	 * During the main loop, the layout of the two strings is something like:
+	 *
+	 *          v ------1------ v ------2------ v
+	 *      X1:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
+	 *      X0: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
+	 *
+	 * where v indicates the alignment boundaries and corresponding chunks
+	 * of the strings have the same letters.  Chunk A has been checked in
+	 * the previous iteration.  This iteration, we first check that string
+	 * X0 doesn't end within region 2, then we compare chunk B between the
+	 * two strings.  As X0 is known not to hold a NUL byte in regions 1
+	 * and 2 at this point, this also ensures that X1 has not ended yet.
+	 */
+	.p2align 4
+0:
+	ldr	q0, [x10, x11]
+	ldr	q1, [x10, x8]
+	ldr	q2, [x10]
+
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	fmov	x6, d1
+	shrn	v0.8b, v0.8h, #4
+	fmov	x5, d0
+	cbnz	x6, .Lnulfounds
+	mvn	x5, x5
+	cbnz	x5, .Lmismatchs
+
+	add	x10, x10, #16
+
+	ldr	q0, [x10, x11]
+	ldr	q1, [x10, x8]
+	ldr	q2, [x10]
+
+	add	x10, x10, #16
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	fmov	x6, d1
+	shrn	v0.8b, v0.8h, #4
+	fmov	x5, d0
+	cbnz	x6, .Lnulfound2s
+	mvn	x5, x5
+	cbz	x5, 0b
+
+	sub	x10, x10, #16
+
+.Lmismatchs:
+	rbit	x2, x5
+	clz	x2, x2
+	lsr	x2, x2, #2
+	add	x11, x10, x11
+
+	ldrb	w4, [x10, x2]
+	ldrb	w5, [x11, x2]
+	sub	w0, w5, w4
+	ret
+
+	.p2align 4
+.Lnulfound2s:
+	sub	x10, x10, #16
+.Lnulfounds:
+	mov	x7, x9
+	mov	x4, x6
+
+	ubfiz	x7, x7, #2, #4
+	lsl	x6, x6, x7
+	orn	x5, x6, x5
+	cbnz	x5, .Lmismatchs
+
+	ldr	q0, [x10, x9]
+	ldr	q1, [x10, x8]
+
+	cmeq	v1.16b, v0.16b, v1.16b
+	shrn	v1.8b, v1.8h, #4
+	fmov	x5, d1
+
+	orn	x5, x4, x5
+
+	rbit	x2, x5
+	clz	x2, x2
+	lsr	x5, x2, #2
+
+	add	x11, x10, x8
+	add	x10, x10, x9
+
+	ldrb	w4, [x10, x5]
+	ldrb	w5, [x11, x5]
+	sub	w0, w5, w4
+	ret
+
+END(__strcmp)
+
+	.section .rodata
+	.p2align 4
+shift_data:
+	.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+	.fill 16, 1, -1
+	.size shift_data, .-shift_data

From b91003acffe7b50dd6506be15116c6b42fc512c6 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:13:54 +0200
Subject: [PATCH 103/143] lib/libc/aarch64/string: add strspn optimized
 implementation

This is a port of the Scalar optimized variant of strspn for amd64
to aarch64.

It utilizes a LUT to speed up the function, a SIMD variant is still
under development.

See the DR for benchmark results.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46396
---
 lib/libc/aarch64/string/Makefile.inc |   4 +-
 lib/libc/aarch64/string/strspn.S     | 111 +++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/strspn.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index ba0947511872cf..09bfaef963eb5f 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -21,7 +21,9 @@ AARCH64_STRING_FUNCS= \
 
 # SIMD-enhanced routines not derived from Arm's code
 MDSRCS+= \
-	strcmp.S
+	strcmp.S \
+	strspn.S
+
 #
 # Add the above functions. Generate an asm file that includes the needed
 # Arm Optimized Routines file defining the function name to the libc name.
diff --git a/lib/libc/aarch64/string/strspn.S b/lib/libc/aarch64/string/strspn.S
new file mode 100644
index 00000000000000..0ef42c2b737e90
--- /dev/null
+++ b/lib/libc/aarch64/string/strspn.S
@@ -0,0 +1,111 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <machine/asm.h>
+
+	.weak	strspn
+	.set	strspn, __strspn
+	.text
+
+ENTRY(__strspn)
+
+	/* check for special cases */
+	ldrb	w4, [x1]		// first character in set
+	cbz	w4, .Lzero		// empty set always returns 0
+
+	mov	x15, #1			// preload register with 1 for stores
+
+	// set is only one character
+	ldrb	w5, [x1, #1]		// second character in the set
+	cbz	w5, .Lsingle
+
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+	sub	sp, sp, #256		// allocate 256 bytes on the stack
+
+	/* no special case matches -- prepare lookup table */
+	mov	w3, #28
+0:	add	x9, sp, x3, lsl #3
+	stp	xzr, xzr, [x9]
+	stp	xzr, xzr, [x9, #16]
+	subs	w3, w3, #4
+	b.cs	0b
+
+	strb	w15, [sp, x4]		// register first character in set
+	add	x1, x1, #2
+
+	/* process remaining chars in set */
+	.p2align 4
+
+
+0:	ldrb	w4, [x1]		// next char in set
+	strb	w15, [sp, x5]		// register previous char
+	cbz	w4, 1f			// NUL encountered?
+
+	ldrb	w5, [x1, #1]
+	add	x1, x1, #2
+	strb	w15, [sp, x4]
+	cbnz	w5, 0b
+
+1:	mov	x5, x0			// stash a copy of src
+
+	/* find mismatch */
+	.p2align 4
+0:	ldrb	w8, [x0]
+	ldrb	w9, [sp, x8]
+	cbz	w9, 2f
+
+	ldrb	w8, [x0, #1]
+	ldrb	w9, [sp, x8]
+	cbz	w9, 3f
+
+	ldrb	w8, [x0, #2]
+	ldrb	w9, [sp, x8]
+	cbz	w9, 4f
+
+	ldrb	w8, [x0, #3]
+	add	x0, x0, #4
+	ldrb	w9, [sp, x8]
+	cbnz	w9, 0b
+
+	sub	x0, x0, #3
+4:	sub	x5, x5, #1
+3:	add	x0, x0, #1
+2:	sub	x0, x0, x5
+	mov	sp, x29
+	ldp	x29, x30, [sp], #16
+	ret
+
+.Lzero:
+	mov	x0, #0
+	ret
+
+.Lsingle:
+	ldrb	w8, [x0, x5]
+	cmp	w4, w8
+	b.ne	1f
+
+	add	x5, x5, #1
+	ldrb	w8, [x0, x5]
+	cmp	w4, w8
+	b.ne	1f
+
+	add	x5, x5, #1
+	ldrb	w8, [x0, x5]
+	cmp	w4, w8
+	b.ne	1f
+
+	add	x5, x5, #1
+	ldrb	w8, [x0, x5]
+	add	x5, x5, #1
+	cmp	w4, w8
+	b.eq	.Lsingle
+
+	sub	x5, x5, #1
+1:	mov	x0, x5
+	ret
+
+END(__strspn)

From f2bd390a54f183f85dd7faab815740fb3bea9591 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:14:01 +0200
Subject: [PATCH 104/143] lib/libc/aarch64/string: add strcspn optimized
 implementation

This is a port of the Scalar optimized variant of strcspn for amd64
to aarch64 It utilizes a LUT to speed up the function, a SIMD
variant is still under development.

Performance benchmarks are as usual generated by strperf.

See the DR for benchmark results.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46398
---
 lib/libc/aarch64/string/Makefile.inc |   3 +-
 lib/libc/aarch64/string/strcspn.S    | 109 +++++++++++++++++++++++++++
 2 files changed, 111 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/strcspn.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 09bfaef963eb5f..34483532a3dd3c 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -22,7 +22,8 @@ AARCH64_STRING_FUNCS= \
 # SIMD-enhanced routines not derived from Arm's code
 MDSRCS+= \
 	strcmp.S \
-	strspn.S
+	strspn.S \
+	strcspn.S
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/strcspn.S b/lib/libc/aarch64/string/strcspn.S
new file mode 100644
index 00000000000000..8f2d6d20f0f66b
--- /dev/null
+++ b/lib/libc/aarch64/string/strcspn.S
@@ -0,0 +1,109 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <machine/asm.h>
+
+	.weak	strcspn
+	.set	strcspn, __strcspn
+	.text
+
+ENTRY(__strcspn)
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+	mov	x15, #1			// preload register with 1 for stores
+
+	/* check for special cases */
+	ldrb	w4, [x1]		// first character in the set
+	cbz	w4, .Lstrlen
+
+	movi	v0.16b, #0
+
+	ldrb	w5, [x1, #1]		// second character in the set
+	cbz	w5, .Lstrchr
+
+	sub	sp, sp, #256		// allocate 256 bytes on the stack
+
+	/* no special case matches -- prepare lookup table */
+	mov	w3, #20
+	.p2align 4
+0:	add	x9, sp, x3, lsl #3
+	stp	xzr, xzr, [x9]
+	stp	xzr, xzr, [x9, #16]
+	subs	w3, w3, #4
+	b.cs	0b
+
+	/* utilize SIMD stores to speed up zeroing the table */
+	stp	q0, q0, [sp, #6*32]
+	stp	q0, q0, [sp, #7*32]
+
+	add	x1, x1, #2
+	strb	w15, [sp, x4]		// register first chars in the set
+	strb	w15, [sp, x5]
+
+	mov	x4, x0			// stash a copy of src
+
+	/* process remaining chars in set */
+	.p2align 4
+0:	ldrb	w5, [x1]
+	strb	w15, [sp, x5]
+	cbz	w5, 1f			// end of set?
+
+	ldrb	w5, [x1, #1]
+	strb	w15, [sp, x5]
+	cbz	w5, 1f
+
+	add	x1, x1, #2
+	b	0b
+
+	/* find match */
+	.p2align 4
+1:	ldrb	w8, [x0]
+	ldrb	w9, [sp, x8]
+	cbnz	w9, 2f
+
+	ldrb	w8, [x0, #1]
+	ldrb	w9, [sp, x8]
+	cbnz	w9, 3f
+
+	ldrb	w8, [x0, #2]
+	ldrb	w9, [sp, x8]
+	cbnz	w9, 4f
+
+	ldrb	w8, [x0, #3]
+	ldrb	w9, [sp, x8]
+	add	x0, x0, #4
+	cbz	w9, 1b
+
+	sub	x0, x0, #3		// fix up return value
+4:	sub	x4, x4, #1
+3:	add	x0, x0, #1
+2:	sub	x0, x0, x4
+	mov	sp, x29
+	ldp	x29, x30, [sp], #16	// restore sp and lr
+	ret
+
+	/* set is empty, degrades to strlen */
+	.p2align 4
+.Lstrlen:
+	mov	sp, x29
+	ldp	x29, x30, [sp], #16	// restore sp and lr
+	b	strlen
+
+	/* just one character in set, degrades to strchrnul */
+	.p2align 4
+.Lstrchr:
+	stp	x0, x1, [sp, #-16]!
+	mov	x1, x4
+
+	bl	strchrnul
+
+	ldp	x18, x17, [sp], #16	// restore stashed src
+	sub	x0, x0, x18
+
+	ldp	x29, x30, [sp], #16	// Restore sp and lr
+	ret
+
+END(__strcspn)

From 89b3872376cbb6e8ab53cb50fa8c4c6d14e2d405 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:14:08 +0200
Subject: [PATCH 105/143] lib/libc/aarch64/string: add optimized strpbrk &
 strsep implementations

These are direct copies from the amd64 string functions using the
optimized strcspn from D46398

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46399
---
 lib/libc/aarch64/string/Makefile.inc |  4 +-
 lib/libc/aarch64/string/strpbrk.c    | 43 +++++++++++++++++++++
 lib/libc/aarch64/string/strsep.c     | 57 ++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/strpbrk.c
 create mode 100644 lib/libc/aarch64/string/strsep.c

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 34483532a3dd3c..996a2fd45bc034 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -23,7 +23,9 @@ AARCH64_STRING_FUNCS= \
 MDSRCS+= \
 	strcmp.S \
 	strspn.S \
-	strcspn.S
+	strcspn.S \
+	strpbrk.c \
+	strsep.c
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/strpbrk.c b/lib/libc/aarch64/string/strpbrk.c
new file mode 100644
index 00000000000000..87f5877899918f
--- /dev/null
+++ b/lib/libc/aarch64/string/strpbrk.c
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+size_t __strcspn(const char *, const char *);
+
+char *
+strpbrk(const char *s, const char *charset)
+{
+	size_t loc;
+
+	loc = __strcspn(s, charset);
+
+	return (s[loc] == '\0' ? NULL : (char *)&s[loc]);
+}
diff --git a/lib/libc/aarch64/string/strsep.c b/lib/libc/aarch64/string/strsep.c
new file mode 100644
index 00000000000000..7afd47957aa9af
--- /dev/null
+++ b/lib/libc/aarch64/string/strsep.c
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+size_t __strcspn(const char *, const char *);
+
+/*
+ * We have a fast strcspn() on aarch64.  Use it over a direct
+ * implementation of strsep for better performance.
+ */
+char *
+strsep(char **stringp, const char *delim)
+{
+	size_t n;
+	char *s;
+
+	s = *stringp;
+	if (s == NULL)
+		return (NULL);
+
+	n = __strcspn(s, delim);
+	if (s[n] == '\0')
+		*stringp = NULL;
+	else {
+		s[n] = '\0';
+		*stringp = s + n + 1;
+	}
+
+	return (s);
+}

From 79287d783c72f95eb47c26dbfdfca279086e16a9 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:14:15 +0200
Subject: [PATCH 106/143] lib/libc/aarch64/string: strcat enable use of SIMD

Call into SIMD strlen and stpcpy for an optimized strcat. Port of
D42600 for amd64.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46417
---
 lib/libc/aarch64/string/Makefile.inc |  3 ++-
 lib/libc/aarch64/string/strcat.c     | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/strcat.c

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 996a2fd45bc034..0b2974947389e1 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -25,7 +25,8 @@ MDSRCS+= \
 	strspn.S \
 	strcspn.S \
 	strpbrk.c \
-	strsep.c
+	strsep.c \
+	strcat.c
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/strcat.c b/lib/libc/aarch64/string/strcat.c
new file mode 100644
index 00000000000000..c70875be1c1a1e
--- /dev/null
+++ b/lib/libc/aarch64/string/strcat.c
@@ -0,0 +1,20 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <string.h>
+
+#undef strcat	/* _FORTIFY_SOURCE */
+
+char *
+strcat(char * __restrict s, const char * __restrict append)
+{
+	char *save = s;
+
+	/* call into SIMD optimized functions */
+	stpcpy(s + strlen(s), append);
+
+	return(save);
+}

From 756b7fc80837567d114a3c93e9bb987e219a1b23 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:14:31 +0200
Subject: [PATCH 107/143] lib/libc/aarch64/string: add strlcpy SIMD
 implementation

This changeset includes a port of the SIMD implementation of
strlcpy for amd64 to Aarch64.

It is based on memccpy (D46170) with some minor differences.

Performance is significantly better than the scalar implementation.

Benchmark results are as usual generated by the strperf utility
written by fuz.

See the DR for benchmark results.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46243
---
 lib/libc/aarch64/string/Makefile.inc |   3 +-
 lib/libc/aarch64/string/strlcpy.S    | 316 +++++++++++++++++++++++++++
 2 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/strlcpy.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 0b2974947389e1..34a84bcfe1331b 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -26,7 +26,8 @@ MDSRCS+= \
 	strcspn.S \
 	strpbrk.c \
 	strsep.c \
-	strcat.c
+	strcat.c \
+	strlcpy.S
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/strlcpy.S b/lib/libc/aarch64/string/strlcpy.S
new file mode 100644
index 00000000000000..3859aaca447bfd
--- /dev/null
+++ b/lib/libc/aarch64/string/strlcpy.S
@@ -0,0 +1,316 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <machine/asm.h>
+
+	.weak strlcpy
+	.set strlcpy, __strlcpy
+	.text
+
+ENTRY(__strlcpy)
+	subs	x2, x2, #1
+	b.lo	.L0
+
+	mov	x9, x0			// stash copy of dst pointer
+	bic	x10, x1, #0xf		// src aligned
+	and	x11, x1, #0xf		// src offset
+
+	ldr	q1, [x10]
+	cmeq	v1.16b, v1.16b, #0	// NUL found in head?
+
+	mov	x8, #-1			// fill register with 0xfff..fff
+	lsl	x12, x11, #2
+	lsl	x8, x8, x12		// mask of bytes in the string
+
+	shrn	v1.8b, v1.8h, #4
+	fmov	x5, d1
+
+	ands	x5, x5, x8
+	b.ne	.Lhead_nul
+
+	ldr	q3, [x10, #16]		// load second string chunk
+	ldr	q2, [x1]		// load true head
+	mov	x8, #32
+	sub	x8, x8, x11
+
+	cmeq	v1.16b, v3.16b, #0	// NUL found in second chunk?
+
+	subs	x2, x2, x8
+	b.ls	.Lhead_buf_end
+
+	/* process second chunk */
+	shrn	v1.8b, v1.8h, #4
+	fmov	x5, d1
+	cbnz	x5, .Lsecond_nul
+
+	/* string didn't end in second chunk and neither did buffer */
+	ldr	q1,	[x10, #32]	// load next string chunk
+	str	q2,	[x0]		// deposit head into buffer
+	sub	x0, x0, x11		// adjust x0
+	str	q3,	[x0, #16]	// deposit second chunk
+	add	x10, x10, #32		// advance src
+	add	x0, x0, #32		// advance dst
+	subs	x2, x2, #16		// enough left for another round?
+	b.ls	1f
+
+	/* main loop unrolled twice */
+	.p2align 4
+0:
+	cmeq	v2.16b, v1.16b, #0	// NUL found in second chunk?
+	shrn	v2.8b, v2.8h, #4
+	fmov	x5, d2
+
+	cbnz	x5, 3f
+
+	str	q1, [x0]
+	ldr	q1, [x10, #16]		// load next chunk
+
+	cmp	x2, #16			// more than a full chunk left?
+	b.ls	2f
+
+	add	x10, x10, #32		// advance pointers
+	add	x0, x0, #32
+
+	cmeq	v2.16b, v1.16b, #0	// NUL found in second chunk?
+	shrn	v2.8b, v2.8h, #4
+	fmov	x5, d2
+	cbnz	x5, 4f			// process chunk if match
+
+	str	q1, [x0, #-16]
+	ldr	q1, [x10]		// load next chunk
+
+	subs	x2, x2, #32
+	b.hi	0b
+
+1:
+	sub	x10, x10, #16		// undo second advancement
+	add	x2, x2, #16
+	sub	x0, x0, #16
+
+	/* 1--16 bytes left in the buffer but string has not ended yet */
+2:
+	cmeq	v2.16b, v1.16b, #0	// NUL found in second chunk?
+	shrn	v2.8b, v2.8h, #4
+	fmov	x4, d2
+
+	mov	x6, #0xf
+	mov	x7, x4
+
+	lsl	x5, x2, #2		// shift 0xf to the limits position
+	lsl	x5, x6, x5
+	cmp	x2, #16			// dont induce match if limit >=16
+	csel	x5, x5, xzr, lo
+	orr	x8, x4, x5		// treat limit as if terminator present
+
+	rbit	x8, x8			// simulate x86 tzcnt
+	clz	x8, x8			// index of mismatch
+	lsr	x8, x8, #2
+
+	add	x0, x0, x8
+
+	ldr	q1, [x10, x8]		// load tail
+	str	q1, [x0]		// store tail
+	strb	wzr, [x0, #16]
+
+	/* continue to find the end of the string */
+	cbnz	x7, 1f
+
+	/* we opt for a simpler strlen than the one in libc as the
+	 * cmeq, shrn approach is faster for shorter strings.
+	 */
+	.p2align 4
+0:
+	ldr	q1, [x10, #32]
+	cmeq	v1.16b, v1.16b, #0	// bytewise compare against NUL
+	shrn	v1.8b, v1.8h, #4
+	fmov	x7, d1
+	cbnz	x7, 2f
+
+	ldr	q1, [x10, #48]
+	cmeq	v1.16b, v1.16b, #0	// bytewise compare against NUL
+	shrn	v1.8b, v1.8h, #4
+	fmov	x7, d1
+	add	x10, x10, #32
+	cbz	x7, 0b
+
+1:	sub	x10, x10, #16
+2:	rbit	x8, x7
+	clz	x8, x8			// index of mismatch
+	lsr	x8, x8, #2
+
+	sub	x10, x10, x1
+	add	x0, x10, #32
+	add	x0, x0, x8
+
+	ret
+
+4:
+	sub	x10, x10, #16		// undo second advancement
+	sub	x0, x0, #16		// undo second advancement
+
+	/* string has ended but buffer has not */
+3:
+	rbit	x8, x5
+	clz	x8, x8			// index of mismatch
+	lsr	x8, x8, #2
+
+	add	x0, x0, x8		// restore dst pointer
+	add	x10, x10, x8
+
+	ldr	q1, [x10, #-15]
+	str	q1, [x0, #-15]
+	add	x0, x0, #1
+	sub	x0, x10, x1
+
+	ret
+
+.Lhead_buf_end:
+	shrn	v1.8b, v1.8h, #4
+	fmov	x8, d1
+
+	add	x2, x2, #32		// restore limit
+
+	mov	x7, x8
+	mov	x6, #0xf
+
+	cmp	x2, #16			// should we induce a match or not
+	b.lo	0f
+
+	rbit	x8, x8
+	clz	x8, x8			// index of mismatch
+	lsr	x8, x8, #2
+	add	x8, x8, #16
+
+	cmp	x8, x2
+	csel	x8, x8, x2, lo		// copy min(buflen, srclen) bytes
+	b	1f
+0:
+
+	rbit	x8, x8
+	clz	x8, x8			// index of mismatch
+	lsr	x8, x8, #2
+
+	mov	x8, x2
+1:
+
+	sub	x8, x8, x11
+	strb	wzr, [x9, x8]
+
+	/* continue to find the end of the string */
+	cbnz	x7, 1f
+
+	/* we opt for a simpler strlen than the one in libc as the
+	 * cmeq, shrn approach is faster for shorter strings.
+	 */
+	.p2align 4
+0:
+	ldr	q1, [x10, #32]
+	cmeq	v1.16b, v1.16b, #0	// bytewise compare against NUL
+	shrn	v1.8b, v1.8h, #4
+	fmov	x7, d1
+	cbnz	x7, 2f
+
+	ldr	q1, [x10, #48]
+	cmeq	v1.16b, v1.16b, #0	// bytewise compare against NUL
+	shrn	v1.8b, v1.8h, #4
+	fmov	x7, d1
+	add	x10, x10, #32
+	cbz	x7, 0b
+
+1:	sub	x10, x10, #16
+2:	rbit	x6, x7
+	clz	x6, x6			// index of mismatch
+	lsr	x6, x6, #2
+
+	sub	x10, x10, x1
+	add	x0, x10, #32
+	add	x0, x0, x6
+
+	add	x4, x9, x8		// dst + cnt
+	add	x5, x1, x8		// src + cnt
+
+	b	.L1732
+
+.Lsecond_nul:
+	add	x2, x2, x8
+
+	rbit	x8, x5
+	clz	x8, x8			// index of mismatch
+	lsr	x5, x8, #2
+
+	sub	x8, x11, #16
+	sub	x0, x5, x8		// string length
+
+	cmp	x0, x2			// did we match or hit limit first?
+	csel	x8, x2, x0, hi
+
+	add	x4, x9, x8		// dst + cnt
+	add	x5, x1, x8		// src + cnt
+
+	strb	wzr, [x4]
+
+	/* copy 17-32 bytes */
+.L1732:
+	cmp	x8, #16
+	b.lo	.L0816
+	ldp	x16, x17, [x1]
+	ldp	x12, x1, [x5, #-16]
+	stp	x16, x17, [x9]
+	stp	x12, x1, [x4, #-16]
+	ret
+
+.Lhead_nul:
+	rbit	x8, x5
+	clz	x8, x8			// index of mismatch
+	lsr	x8, x8, #2
+
+	sub	x0, x8, x11
+	cmp	x0, x2
+	csel	x8, x2, x0, hi
+
+	add	x4, x9, x8		// dst + cnt
+	add	x5, x1, x8		// src + cnt
+	strb	wzr, [x4]
+
+	/* Copy 8-16 bytes */
+.L0816:
+	tbz	x8, #3, .L0407
+	ldr	x16, [x1]
+	ldr	x17, [x5, #-8]
+	str	x16, [x9]
+	str	x17, [x4, #-8]
+	ret
+
+	/* Copy 4-7 bytes */
+	.p2align 4
+.L0407:
+	cmp	x8, #3
+	b.ls	.L0203
+	ldr	w16, [x1]
+	ldr	w18, [x5, #-4]
+	str	w16, [x9]
+	str	w18, [x4, #-4]
+	ret
+
+.L0203:
+	tbz	x8, 1, .L0001
+	ldrh	w16, [x1]
+	ldrh	w17, [x5, #-2]
+	strh	w16, [x9]
+	strh	w17, [x4, #-2]
+	ret
+
+.L0001:
+	ldrb	w16, [x1]
+	strb	w16, [x9]
+	strb	wzr, [x4]
+	ret
+
+.L0:
+	mov	x0, x1
+	b	strlen
+	ret
+END(__strlcpy)

From 25c485e147691f3929b0b5029bab58bf56d3606b Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:14:37 +0200
Subject: [PATCH 108/143] lib/libc/aarch64/string: add strncmp SIMD
 implementation

This changeset includes a port of the SIMD implementation of
strncmp for amd64 to Aarch64.

It is based on D45839 with added handling for the limit.

An extended unit test for strncmp is currently being written to
make sure the bounds checks for page crossings work as expected.

Performance is significantly better than the existing
implementation from the Arm Optimized Routines repository.

Benchmark results are generated by the strperf utility by fuz.

See the DR for benchmark results.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D45943
---
 lib/libc/aarch64/string/Makefile.inc |   4 +-
 lib/libc/aarch64/string/strncmp.S    | 569 +++++++++++++++++++++++++++
 2 files changed, 571 insertions(+), 2 deletions(-)
 create mode 100644 lib/libc/aarch64/string/strncmp.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 34a84bcfe1331b..351f3424b6d0a6 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -15,7 +15,6 @@ AARCH64_STRING_FUNCS= \
 	strchrnul \
 	strcpy \
 	strlen \
-	strncmp \
 	strnlen \
 	strrchr
 
@@ -27,7 +26,8 @@ MDSRCS+= \
 	strpbrk.c \
 	strsep.c \
 	strcat.c \
-	strlcpy.S
+	strlcpy.S \
+	strncmp.S
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/strncmp.S b/lib/libc/aarch64/string/strncmp.S
new file mode 100644
index 00000000000000..a7f4156da9e8fe
--- /dev/null
+++ b/lib/libc/aarch64/string/strncmp.S
@@ -0,0 +1,569 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <machine/asm.h>
+#include <machine/param.h>
+
+	.weak	strncmp
+	.set	strncmp, __strncmp
+	.text
+
+ENTRY(__strncmp)
+
+	bic	x8, x0, #0xf			// x0 aligned to the boundary
+	and	x9, x0, #0xf			// x9 is the offset
+	bic	x10, x1, #0xf			// x1 aligned to the boundary
+	and	x11, x1, #0xf			// x11 is the offset
+
+	subs	x2, x2, #1
+	b.lo	.Lempty
+
+	mov	x13, #-1			// save constants for later
+	mov	x16, #0xf
+
+	/*
+	 * Check if either string is located at end of page to avoid crossing
+	 * into unmapped page. If so, we load 16 bytes from the nearest
+	 * alignment boundary and shift based on the offset.
+	 */
+
+	add	x3, x0, #16			// end of head
+	add	x4, x1, #16
+	eor	x3, x3, x0
+	eor	x4, x4, x1			// bits that changed
+	orr	x3, x3, x4			// in either str1 or str2
+	cmp	x2,#16
+	b.lo	.Llt16
+	tbz	w3, #PAGE_SHIFT, .Lbegin
+
+	ldr	q0, [x8]			// load aligned head
+	ldr	q1, [x10]
+
+	lsl	x14, x9, #2
+	lsl	x15, x11, #2
+	lsl	x3, x13, x14			// string head
+	lsl	x4, x13, x15
+
+	cmeq	v5.16b, v0.16b, #0
+	cmeq	v6.16b, v1.16b, #0
+
+	shrn	v5.8b, v5.8h, #4
+	shrn	v6.8b, v6.8h, #4
+	fmov	x5, d5
+	fmov	x6, d6
+
+	adrp	x14, shift_data
+	add	x14, x14, :lo12:shift_data
+
+	/* heads may cross page boundary, avoid unmapped loads */
+	tst	x5, x3
+	b.eq	0f
+
+	ldr	q4, [x14, x9]			// load permutation table
+	tbl	v0.16b, {v0.16b}, v4.16b
+
+	b	1f
+	.p2align 4
+0:
+	ldr	q0, [x0]			// load true head
+1:
+	tst	x6, x4
+	b.eq	0f
+
+	ldr	q4, [x14, x11]
+	tbl	v4.16b, {v1.16b}, v4.16b
+
+	b 1f
+
+	.p2align 4
+.Lbegin:
+	ldr	q0, [x0]			// load true heads
+0:
+	ldr	q4, [x1]
+1:
+	cmeq	v2.16b, v0.16b, #0		// NUL byte present?
+	cmeq	v4.16b, v0.16b, v4.16b		// which bytes match?
+
+	orn	v2.16b, v2.16b, v4.16b		// mismatch or NUL byte?
+
+	shrn	v2.8b, v2.8h, #4
+	fmov	x5, d2
+
+	cbnz	x5, .Lhead_mismatch
+	/* load head and second chunk */
+	ldr	q2, [x8, #16]			// load second chunk
+	ldr	q3, [x10, #16]
+
+	add	x2, x2, x11
+	sub	x2, x2, #16
+
+	subs	x9, x9, x11			// is a&0xf >= b&0xf
+	b.lo	.Lswapped			// if not swap operands
+	b	.Lnormal
+
+	.p2align 4
+.Llt16:
+	/*
+	 * Check if either string is located at end of page to avoid crossing
+	 * into unmapped page. If so, we load 16 bytes from the nearest
+	 * alignment boundary and shift based on the offset.
+	 */
+	tbz	w3, #PAGE_SHIFT, 2f
+
+	ldr	q0, [x8]			// load aligned head
+	ldr	q1, [x10]
+
+	lsl	x14, x9, #2
+	lsl	x15, x11, #2
+	lsl	x3, x13, x14			// string head
+	lsl	x4, x13, x15
+
+	/* Introduce a null byte match if the limit is within the aligned chunk */
+	add	x14, x2, x9
+	add	x15, x2, x11
+	lsl	x14, x14, #2
+	lsl	x15, x15, #2
+	lsl	x14, x16, x14
+	lsl	x15, x16, x15
+
+	cmeq	v5.16b, v0.16b, #0
+	cmeq	v6.16b, v1.16b, #0
+
+	shrn	v5.8b, v5.8h, #4
+	shrn	v6.8b, v6.8h, #4
+	fmov	x5, d5
+	fmov	x6, d6
+
+	orr	x5, x5, x14			// insert match at limit
+	orr	x6, x6, x15
+
+	adrp	x14, shift_data
+	add	x14, x14, :lo12:shift_data
+
+	/* heads may cross page boundary, avoid unmapped loads */
+	tst	x5, x3
+	b.eq	0f
+
+	ldr	q4, [x14, x9]			// load permutation table
+	tbl	v0.16b, {v0.16b}, v4.16b
+
+	b	1f
+	.p2align 4
+0:
+	ldr	q0, [x0]			// load true head
+1:
+	tst	x6, x4
+	b.eq	0f
+
+	ldr	q4, [x14, x11]
+	tbl	v4.16b, {v1.16b}, v4.16b
+
+	b 1f
+
+	.p2align 4
+2:
+	ldr	q0, [x0]			// load true heads
+0:
+	ldr	q4, [x1]
+1:
+
+	cmeq	v2.16b, v0.16b, #0		// NUL byte present?
+	cmeq	v4.16b, v0.16b, v4.16b		// which bytes match?
+
+	bic	v2.16b, v4.16b, v2.16b		// match and not NUL byte
+
+	shrn	v2.8b, v2.8h, #4
+	fmov	x5, d2
+	lsl	x4, x2, #2
+	lsl	x4, x13, x4
+	orn	x5, x4, x5			// mismatch or NUL byte?
+
+.Lhead_mismatch:
+	rbit	x3, x5
+	clz	x3, x3				// index of mismatch
+	lsr	x3, x3, #2
+	ldrb	w4, [x0, x3]
+	ldrb	w5, [x1, x3]
+	sub	w0, w4, w5
+	ret
+
+	.p2align 4
+.Lnormal:
+	sub	x12, x10, x9
+	ldr	q0, [x12, #16]!
+	sub	x10, x10, x8
+	sub	x11, x10, x9
+
+	cmeq	v1.16b, v3.16b, #0		// NUL present?
+	cmeq	v0.16b, v0.16b, v2.16b		// Mismatch between chunks?
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+
+	add	x8, x8, #32			// advance to next iteration
+
+	lsl	x4, x2, #2
+	lsl	x4, x13, x4
+	orr	x3, x6, x4			// introduce a null byte match
+	cmp	x2, #16				// does the buffer end within x2
+	csel	x6, x3, x6, lo
+	cbnz	x6, .Lnulfound2			// NUL or end of buffer found?
+	mvn	x5, x5
+	cbnz	x5, .Lmismatch2
+	sub	x2, x2, #16
+	cmp	x2, #32				// end of buffer?
+	b.lo	.Ltail
+	/*
+	 * During the main loop, the layout of the two strings is something like:
+	 *
+	 *          v ------1------ v ------2------ v
+	 *      X0:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
+	 *      X1: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
+	 *
+	 * where v indicates the alignment boundaries and corresponding chunks
+	 * of the strings have the same letters.  Chunk A has been checked in
+	 * the previous iteration.  This iteration, we first check that string
+	 * X1 doesn't end within region 2, then we compare chunk B between the
+	 * two strings.  As X1 is known not to hold a NUL byte in regions 1
+	 * and 2 at this point, this also ensures that x0 has not ended yet.
+	 */
+	.p2align 4
+0:
+	ldr	q0, [x8, x11]
+	ldr	q1, [x8, x10]
+	ldr	q2, [x8]
+
+	cmeq	v1.16b, v1.16b, #0		// end of string?
+	cmeq	v0.16b, v0.16b, v2.16b		// do the chunks match?
+
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+	cbnz	x6, .Lnulfound
+	mvn	x5, x5				// any mismatches?
+	cbnz	x5, .Lmismatch
+
+	add	x8, x8, #16
+
+	/* main loop unrolled twice */
+	ldr	q0, [x8, x11]
+	ldr	q1, [x8, x10]
+	ldr	q2, [x8]
+
+	add	x8, x8, #16
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+	cbnz	x6, .Lnulfound2
+	mvn	x5, x5
+	cbnz	x5, .Lmismatch2
+	sub	x2, x2, #32
+	cmp	x2, #32				// end of buffer?
+	b.hs	0b				// if yes, process tail
+
+	/* end of buffer will occur in next 32 bytes */
+.Ltail:
+	ldr	q0, [x8, x11]
+	ldr	q1, [x8, x10]
+	ldr	q2, [x8]
+
+	cmeq	v1.16b, v1.16b, #0		// end of string?
+	cmeq	v0.16b, v0.16b, v2.16b		// do the chunks match?
+
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+
+	/*
+	 * If x2 <= 16 then we introduce a NUL byte in the
+	 * result from CMEQ to avoid comparing further!
+	 */
+
+	lsl	x4, x2, #2
+	lsl	x4, x13, x4
+	orr	x3, x6, x4			// introduce a null byte match
+	cmp	x2, #16				// does the buffer end within x2
+	csel	x6, x3, x6, lo
+
+	cbnz	x6, .Lnulfound			// NUL or end of string found
+	mvn	x5, x5
+	cbnz	x5, .Lmismatch
+
+	add	x8, x8, #16
+
+	/* main loop unrolled twice */
+	ldr	q0, [x8, x11]
+	ldr	q1, [x8, x10]
+	ldr	q2, [x8]
+
+	add	x8, x8, #16
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+
+	ubfiz	x4, x2, #2, #4	// (x2 - 16) << 2
+	lsl	x4, x13, x4			// take first half into account
+	orr	x6, x6, x4			// introduce a null byte match
+
+.Lnulfound2:
+	sub	x8, x8, #16
+
+.Lnulfound:
+	mov	x4, x6
+
+	ubfiz	x7, x9, #2, #4
+	lsl	x6, x6, x7			// adjust NUL mask to indices
+
+	orn	x5, x6, x5
+	cbnz	x5, .Lmismatch
+
+	/*
+	 * (x0) == (x1) and NUL is past the string.
+	 * Compare (x1) with the corresponding part
+	 * of the other string until the NUL byte.
+	 */
+	ldr	q0, [x8, x9]
+	ldr	q1, [x8, x10]
+
+	cmeq	v1.16b, v0.16b, v1.16b
+	shrn	v1.8b, v1.8h, #4
+	fmov	x5, d1
+
+	orn	x5, x4, x5
+
+	rbit	x3, x5
+	clz	x3, x3
+	lsr	x5, x3, #2
+
+	add	x10, x10, x8			// restore x10 pointer
+	add	x8, x8, x9			// point to corresponding chunk
+
+	ldrb	w4, [x8, x5]
+	ldrb	w5, [x10, x5]
+	sub	w0, w4, w5
+	ret
+
+	.p2align 4
+.Lmismatch2:
+	sub	x8, x8, #16			// roll back second increment
+.Lmismatch:
+	rbit	x3, x5
+	clz	x3, x3				// index of mismatch
+	lsr	x3, x3, #2
+	add	x11, x8, x11
+
+	ldrb	w4, [x8, x3]
+	ldrb	w5, [x11, x3]
+	sub	w0, w4, w5			// byte difference
+	ret
+
+	/*
+	 * If (a&0xf) < (b&0xf), we do the same thing but with swapped
+	 * operands.  I found that this performs slightly better than
+	 * using conditional moves to do the swap branchless.
+	 */
+	.p2align 4
+.Lswapped:
+	add	x12, x8, x9
+	ldr	q0, [x12, #16]!
+	sub	x8, x8, x10
+	add	x11, x8, x9
+	add	x2,x2,x9
+	neg	x9, x9
+
+	cmeq	v1.16b, v2.16b, #0
+	cmeq	v0.16b, v0.16b, v3.16b
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+
+	add	x10, x10, #32
+
+	lsl	x4, x2, #2
+	lsl	x4, x13, x4
+	orr	x3,x6,x4			// introduce a null byte match
+	cmp	x2,#16
+	csel	x6, x3, x6, lo
+	cbnz	x6, .Lnulfound2s
+	mvn	x5, x5
+	cbnz	x5, .Lmismatch2s
+
+	sub	x2, x2, #16
+	cmp	x2, #32
+	b.lo	.Ltails
+
+	/*
+	 * During the main loop, the layout of the two strings is something like:
+	 *
+	 *          v ------1------ v ------2------ v
+	 *      X1:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
+	 *      X0: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
+	 *
+	 * where v indicates the alignment boundaries and corresponding chunks
+	 * of the strings have the same letters.  Chunk A has been checked in
+	 * the previous iteration.  This iteration, we first check that string
+	 * X0 doesn't end within region 2, then we compare chunk B between the
+	 * two strings.  As X0 is known not to hold a NUL byte in regions 1
+	 * and 2 at this point, this also ensures that X1 has not ended yet.
+	 */
+	.p2align 4
+0:
+	ldr	q0, [x10, x11]
+	ldr	q1, [x10, x8]
+	ldr	q2, [x10]
+
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+	cbnz	x6, .Lnulfounds
+	mvn	x5, x5
+	cbnz	x5, .Lmismatchs
+
+	add	x10, x10, #16
+
+	/* main loop unrolled twice */
+	ldr	q0, [x10, x11]
+	ldr	q1, [x10, x8]
+	ldr	q2, [x10]
+
+	add	x10, x10, #16
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+	cbnz	x6, .Lnulfound2s
+	mvn	x5, x5
+	cbnz	x5, .Lmismatch2s
+	sub	x2, x2, #32
+	cmp	x2, #32
+	b.hs	0b
+
+.Ltails:
+	ldr	q0, [x10, x11]
+	ldr	q1, [x10, x8]
+	ldr	q2, [x10]
+
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+
+	/*
+	 * If x2 <= 16 then we introduce a NUL byte in the
+	 * result from CMEQ to avoid comparing further!
+	 */
+
+	lsl	x4, x2, #2
+	lsl	x4, x13, x4
+	orr	x3, x6, x4			// introduce a null byte match
+	cmp	x2, #16
+	csel	x6, x3, x6, lo
+
+	cbnz	x6, .Lnulfounds
+	mvn	x5, x5
+	cbnz	x5, .Lmismatchs
+
+	add	x10, x10, #16
+
+	ldr	q0, [x10, x11]
+	ldr	q1, [x10, x8]
+	ldr	q2, [x10]
+
+	add	x10, x10, #16
+	cmeq	v1.16b, v1.16b, #0
+	cmeq	v0.16b, v0.16b, v2.16b
+
+	shrn	v1.8b, v1.8h, #4
+	shrn	v0.8b, v0.8h, #4
+	fmov	x6, d1
+	fmov	x5, d0
+
+	ubfiz	x4, x2, #2, #4
+	lsl	x4, x13, x4
+	orr	x6, x6, x4			// introduce a null byte match
+
+.Lnulfound2s:
+	sub	x10, x10, #16
+.Lnulfounds:
+	mov	x4, x6
+
+	ubfiz	x7, x9, #2, #4
+	lsl	x6, x6, x7
+
+	orn	x5, x6, x5
+
+	cbnz	x5, .Lmismatchs
+
+	ldr	q0, [x10, x9]
+	ldr	q1, [x10, x8]
+
+	cmeq	v1.16b, v0.16b, v1.16b
+	shrn	v1.8b, v1.8h, #4
+	fmov	x5, d1
+
+	orn	x5, x4, x5
+
+	rbit	x3, x5
+	clz	x3, x3
+	lsr	x5, x3, #2
+
+	add	x11, x10, x8
+	add	x10, x10, x9
+
+	ldrb	w4, [x10, x5]
+	ldrb	w5, [x11, x5]
+	sub	w0, w5, w4
+	ret
+
+	.p2align 4
+.Lmismatch2s:
+	sub	x10, x10, #16
+.Lmismatchs:
+	rbit	x3, x5
+	clz	x3, x3
+	lsr	x3, x3, #2
+	add	x11, x10, x11
+
+	ldrb	w4, [x10, x3]
+	ldrb	w5, [x11, x3]
+	sub	w0, w5, w4
+	ret
+
+	.p2align 4
+.Lempty:
+	eor	x0, x0, x0
+	ret
+
+END(__strncmp)
+
+	.section .rodata
+	.p2align 4
+shift_data:
+	.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+	.fill 16, 1, -1
+	.size shift_data, .-shift_data

From bad17991c06d684e9053938d00a07b962e2fd31c Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:15:13 +0200
Subject: [PATCH 109/143] lib/libc/aarch64/string: add memccpy SIMD
 implementation

This changeset includes a port of the SIMD implementation of
memccpy for amd64 to Aarch64.

Performance is significantly better than the scalar implementation
except for short strings.

Benchmark results are as usual generated by the strperf utility
written by fuz.

See the DR for benchmark results.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46170
---
 lib/libc/aarch64/string/Makefile.inc |   3 +-
 lib/libc/aarch64/string/memccpy.S    | 271 +++++++++++++++++++++++++++
 2 files changed, 273 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/memccpy.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 351f3424b6d0a6..78145a17ab8585 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -27,7 +27,8 @@ MDSRCS+= \
 	strsep.c \
 	strcat.c \
 	strlcpy.S \
-	strncmp.S
+	strncmp.S \
+	memccpy.S
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/memccpy.S b/lib/libc/aarch64/string/memccpy.S
new file mode 100644
index 00000000000000..7d9fdb14b84b9d
--- /dev/null
+++ b/lib/libc/aarch64/string/memccpy.S
@@ -0,0 +1,271 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <machine/asm.h>
+
+	.weak	memccpy
+	.set	memccpy, __memccpy
+	.text
+
+ENTRY(__memccpy)
+	subs	x3, x3, #1
+	b.lo	.L0
+
+	dup	v0.16b,	w2
+
+	mov	x9, x0			// stash copy of src pointer
+	bic	x10, x1, #0xf		// src aligned
+	and	x11, x1, #0xf		// src offset
+
+	ldr	q1, [x10]
+	cmeq	v1.16b, v1.16b, v0.16b	// bytewise compare against src char
+
+	mov	x8, #-1			// prepare a 0xfff..fff register
+	mov	x6, #0xf
+
+	lsl	x12, x11, #2
+	lsl	x8, x8, x12		// mask of bytes in the string
+
+	shrn	v1.8b, v1.8h, #4
+	fmov	x5, d1
+
+	sub	x12, x11, #32
+	adds	x12, x12, x3		// distance from alignment boundary - 32
+	b.cc	.Lrunt			// branch if buffer length is 32 or less
+
+	ands	x8, x8, x5
+	b.eq	0f
+
+	/* match in first chunk */
+	rbit	x8, x8
+	clz	x8, x8			// index of mismatch
+	lsr	x8, x8, #2
+
+	sub	x8, x8, x11		// ... from beginning of the string
+
+	add	x0, x0, x8
+	add	x4, x9, x8		// dst + cnt
+	add	x5, x1, x8		// src + cnt
+	add	x0, x0, #1
+
+	b	.L0816
+
+0:
+	ldr	q3,	[x10, #16]	// load second string chunk
+	ldr	q2,	[x1]		// load true head
+	cmeq	v1.16b, v3.16b, v0.16b	// char found in second chunk?
+
+	/* process second chunk */
+	shrn	v1.8b, v1.8h, #4
+	fmov	x5, d1
+
+	cbz	x5, 0f
+
+	/* match in second chunk */
+	rbit	x8, x5
+	clz	x8, x8			// index of mismatch
+	lsr	x8, x8, #2
+
+	sub	x11, x11, #16
+	sub	x8, x8, x11		// adjust for alignment offset
+	add	x0, x0, x8		// return value
+	add	x0, x0, #1
+
+	add	x4, x9, x8
+	add	x5, x1, x8
+	b	.L1732
+
+0:
+	/* string didn't end in second chunk and neither did buffer */
+	ldr	q1,	[x10, #32]	// load next string chunk
+	str	q2,	[x0]		// deposit head into buffer
+	sub	x0, x0, x11		// adjust x0
+	mov	x3, x12
+	str	q3,	[x0, #16]	// deposit second chunk
+
+	add	x10, x10, #32		// advance src
+	add	x0, x0, #32		// advance dst
+	subs	x3, x3, #16		// enough left for another round?
+	b.lo	1f
+
+	/* main loop unrolled twice */
+	.p2align 4
+0:
+	cmeq	v2.16b, v1.16b, v0.16b	// char found in second chunk?
+	shrn	v2.8b, v2.8h, #4
+	fmov	x5, d2
+
+	cbnz	x5, 3f
+
+	str	q1, [x0]
+	ldr	q1, [x10, #16]		// load next chunk
+
+	cmp	x3, #16			// more than a full chunk left?
+	b.lo	2f
+
+	add	x10, x10, #32		// advance pointers
+	add	x0, x0, #32
+
+	cmeq	v2.16b, v1.16b, v0.16b	// char found in second chunk?
+	shrn	v2.8b, v2.8h, #4
+	fmov	x5, d2
+	cbnz	x5, 4f			// process chunk if match
+
+	str	q1, [x0, #-16]
+	ldr	q1, [x10]		// load next chunk
+
+	subs	x3, x3, #32
+	b.hs	0b
+
+1:
+	sub	x10, x10, #16		// undo second advancement
+	add	x3, x3, #16
+	sub	x0, x0, #16
+
+	/* 1--16 bytes left in the buffer but string has not ended yet */
+2:
+	cmeq	v2.16b, v1.16b, v0.16b	// char found in second chunk?
+	shrn	v2.8b, v2.8h, #4
+	fmov	x4, d2
+
+	lsl	x5, x3, #2		// shift 0xf to the limits position
+	lsl	x5, x6, x5
+	orr	x8, x4, x5		// insert match in mask at limit
+
+	rbit	x8, x8			// simulate x86 tzcnt
+	clz	x7, x8			// index of mismatch
+	lsr	x8, x7, #2
+
+	lsl	x5, x6, x7		// simulate x86 bt with shifted 0xf
+
+	add	x8, x8, #1
+	add	x0, x0, x8
+
+	ldr	q1, [x10, x8]		// load tail
+	str	q1, [x0]		// store tail
+
+	add	x0, x0, #16
+
+	tst	x4, x5			// terminator encountered inside buffer?
+	csel	x0, x0, xzr, ne		// if yes, return pointer, else NUL
+	ret
+
+4:
+	sub	x10, x10, #16		// undo second advancement
+	sub	x0, x0, #16		// undo second advancement
+
+3:
+	rbit	x8, x5
+	clz	x8, x8			// index of mismatch
+	lsr	x3, x8, #2
+
+	add	x0, x0, x3		// restore dst pointer
+	add	x10, x10, x3
+	ldr	q1, [x10, #-15]
+	str	q1, [x0, #-15]
+	add	x0, x0, #1
+	ret
+
+.Lrunt:
+	add	x13, x11, x3
+
+	mov	x7, x5			// keep a copy of original match mask
+
+	lsl	x4, x12, #2		// shift 0xf to the limits position
+	lsl	x4, x6, x4
+
+	cmp	x13, #16		// dont induce match if limit >=16
+	csel	x4, x4, xzr, lo
+	orr	x5, x5, x4		// insert match in mask at limit
+
+	ands	x8, x8, x5		// if match always fall through
+	b.ne	0f
+
+	ldr	q4,	[x10, #16]	// load second string chunk
+	cmeq	v1.16b, v4.16b, v0.16b	// char found in second chunk?
+
+	/* process second chunk */
+	shrn	v1.8b, v1.8h, #4
+	fmov	x8, d1
+	mov	x7, x8
+
+	lsl	x4, x12, #2
+	lsl	x4, x6, x4
+	orr	x8, x8, x4		// induce match in upper bytes of mask
+
+	rbit	x8, x8
+	clz	x4, x8			// index of mismatch
+	lsr	x8, x4, #2
+	add	x8, x8, #16		// no match in first chunk
+	b	1f
+
+0:
+	rbit	x8, x8
+	clz	x4, x8			// index of mismatch
+	lsr	x8, x4, #2
+1:
+	add	x0, x0, x8		// return value if terminator not found
+	sub	x0, x0, x11
+	add	x0, x0, #1
+
+	/* check if we encountered a match or the limit first */
+	lsl	x5, x6, x4
+	ands	x7, x7, x5		// was the terminator present?
+	csel	x0, xzr, x0, eq		// return value based on what we matched
+
+	sub	x8, x8, x11
+	add	x4, x9, x8		// dst + cnt
+	add	x5, x1, x8		// src + cnt
+
+	/* copy 17-32 bytes */
+.L1732:
+	cmp	x8, #16
+	b.lo	.L0816
+	add	x5, x5, #1		// ldp offsets are powers of 2
+	add	x4, x4, #1
+	ldp	x16, x17, [x1]
+	ldp	x12, x13, [x5, #-16]
+	stp	x16, x17, [x9]
+	stp	x12, x13, [x4, #-16]
+	ret
+
+	/* Copy 8-16 bytes */
+.L0816:
+	tbz	x8, #3, .L0407
+	ldr	x16, [x1]
+	ldr	x17, [x5, #-7]
+	str	x16, [x9]
+	str	x17, [x4, #-7]
+	ret
+
+	/* Copy 4-7 bytes */
+	.p2align 4
+.L0407:
+	cmp	x8, #3
+	b.lo	.L0103
+	ldr	w16, [x1]
+	ldr	w18, [x5, #-3]
+	str	w16, [x9]
+	str	w18, [x4, #-3]
+	ret
+
+	/* Copy 1-3 bytes */
+	.p2align 4
+.L0103:
+	lsr	x14, x8, #1
+	ldrb	w16, [x1]
+	ldrb	w15, [x5]
+	ldrb	w18, [x1, x14]
+	strb	w16, [x9]
+	strb	w18, [x9, x14]
+	strb	w15, [x4]
+	ret
+
+.L0:
+	eor	x0, x0, x0
+	ret
+
+END(__memccpy)

From 3dc5429158cf221374cdbd0bbb728962bff4fb76 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:15:34 +0200
Subject: [PATCH 110/143] lib/libc/aarch64/string: add strncat SIMD
 implementation

This patch requires D46170 as it depends on strlcpy being labeled
__memccpy.

It's a direct copy from the amd64 string functions.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46292
---
 lib/libc/aarch64/string/Makefile.inc |  3 ++-
 lib/libc/aarch64/string/strncat.c    | 29 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/strncat.c

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 78145a17ab8585..876ef4257b4c06 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -28,7 +28,8 @@ MDSRCS+= \
 	strcat.c \
 	strlcpy.S \
 	strncmp.S \
-	memccpy.S
+	memccpy.S \
+	strncat.c
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/strncat.c b/lib/libc/aarch64/string/strncat.c
new file mode 100644
index 00000000000000..33b278ac5e04cf
--- /dev/null
+++ b/lib/libc/aarch64/string/strncat.c
@@ -0,0 +1,29 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Robert Clausecker
+ */
+
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+void *__memccpy(void *restrict, const void *restrict, int, size_t);
+
+char *
+strncat(char *dest, const char *src, size_t n)
+{
+	size_t len;
+	char *endptr;
+
+	len = strlen(dest);
+	endptr = __memccpy(dest + len, src, '\0', n);
+
+	/* avoid an extra branch */
+	if (endptr == NULL)
+		endptr = dest + len + n + 1;
+
+	endptr[-1] = '\0';
+
+	return (dest);
+}

From bea89d038ac54048bb7dcb149cabd99067e5a3a9 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 23:10:16 +0200
Subject: [PATCH 111/143] lib/libc/aarch64/string: add strlcat SIMD
 implementation

This patch requires D46243 as it depends on strlcpy being labeled
__strlcpy.

It's a direct copy from the amd64 string functions using memchr and
strlcpy to implement strlcat.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46272
---
 lib/libc/aarch64/string/Makefile.inc |  3 ++-
 lib/libc/aarch64/string/memchr.S     |  4 ++++
 lib/libc/aarch64/string/strlcat.c    | 25 +++++++++++++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/memchr.S
 create mode 100644 lib/libc/aarch64/string/strlcat.c

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 876ef4257b4c06..f8c67319fe12ac 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -29,7 +29,8 @@ MDSRCS+= \
 	strlcpy.S \
 	strncmp.S \
 	memccpy.S \
-	strncat.c
+	strncat.c \
+	strlcat.c
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/memchr.S b/lib/libc/aarch64/string/memchr.S
new file mode 100644
index 00000000000000..6d4330d9115e9c
--- /dev/null
+++ b/lib/libc/aarch64/string/memchr.S
@@ -0,0 +1,4 @@
+	.weak memchr
+	.set memchr, __memchr_aarch64
+
+#include "aarch64/memchr.S"
diff --git a/lib/libc/aarch64/string/strlcat.c b/lib/libc/aarch64/string/strlcat.c
new file mode 100644
index 00000000000000..c3c996163ade00
--- /dev/null
+++ b/lib/libc/aarch64/string/strlcat.c
@@ -0,0 +1,25 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Robert Clausecker
+ */
+
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+void *__memchr_aarch64(const void *, int, size_t);
+size_t __strlcpy(char *restrict, const char *restrict, size_t);
+
+size_t
+strlcat(char *restrict dst, const char *restrict src, size_t dstsize)
+{
+	char *loc = __memchr_aarch64(dst, '\0', dstsize);
+
+	if (loc != NULL) {
+		size_t dstlen = (size_t)(loc - dst);
+
+		return (dstlen + __strlcpy(loc, src, dstsize - dstlen));
+	} else
+		return (dstsize + strlen(src));
+}

From 5ebd4d0dd2f45040aa5e5b028a4b93163aea6899 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 20:13:44 +0200
Subject: [PATCH 112/143] lib/libc/aarch64/string: add memcpy SIMD
 implementation

I noticed that we have a SIMD optimized memcpy in the
arm-optimized-routines in /contrib.

This patch ensures we use the SIMD variant as opposed to the Scalar
optimized variant.

Benchmarks are generated by fuz' strperf utility.

See the DR for benchmark results.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46251
---
 lib/libc/aarch64/string/memcpy.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/libc/aarch64/string/memcpy.S b/lib/libc/aarch64/string/memcpy.S
index f403dd2e42a8ad..53e860750eb225 100644
--- a/lib/libc/aarch64/string/memcpy.S
+++ b/lib/libc/aarch64/string/memcpy.S
@@ -1,3 +1,3 @@
-#define	__memcpy_aarch64	memcpy
-#define	__memmove_aarch64	memmove
-#include "aarch64/memcpy.S"
+#define	__memcpy_aarch64_simd	memcpy
+#define	__memmove_aarch64_simd	memmove
+#include "aarch64/memcpy-advsimd.S"

From 3863fec1ce2dc6033f094a085118605ea89db9e2 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Mon, 26 Aug 2024 21:54:32 +0200
Subject: [PATCH 113/143] lib/libc/aarch64/string: add strlen SIMD
 implementation

Adds a SIMD enhanced strlen for Aarch64. It takes inspiration from
the amd64 implementation but I struggled getting the performance I
had hoped for on cores like the Graviton3 when compared to the
existing implementation from Arm Optimized Routines.

See the DR for bechmark results.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D45623
---
 lib/libc/aarch64/string/Makefile.inc |  4 +--
 lib/libc/aarch64/string/strlen.S     | 46 ++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 lib/libc/aarch64/string/strlen.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index f8c67319fe12ac..7325b54d9716fc 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -14,7 +14,6 @@ AARCH64_STRING_FUNCS= \
 	strchr \
 	strchrnul \
 	strcpy \
-	strlen \
 	strnlen \
 	strrchr
 
@@ -30,7 +29,8 @@ MDSRCS+= \
 	strncmp.S \
 	memccpy.S \
 	strncat.c \
-	strlcat.c
+	strlcat.c \
+	strlen.S
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/strlen.S b/lib/libc/aarch64/string/strlen.S
new file mode 100644
index 00000000000000..7bfac7f4b1e191
--- /dev/null
+++ b/lib/libc/aarch64/string/strlen.S
@@ -0,0 +1,46 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <machine/asm.h>
+
+	.weak	strlen
+	.set	strlen, __strlen
+	.text
+
+ENTRY(__strlen)
+	bic	x10, x0, #0xf		// aligned src
+	and	x9, x0, #0xf
+	ldr	q0, [x10]
+	cmeq	v0.16b, v0.16b, #0
+	shrn	v0.8b, v0.8h, #4
+	fmov	x1, d0
+	cbz	x9, .Laligned
+	lsl	x2, x0, #2		// get the byte offset
+	lsr	x1, x1, x2		// shift by offset index
+	cbz	x1, .Lloop
+	rbit	x1, x1
+	clz	x0, x1
+	lsr	x0, x0, #2
+	ret
+
+.Laligned:
+	cbnz	x1, .Ldone
+
+.Lloop:
+	ldr	q0, [x10, #16]!
+	cmeq	v0.16b, v0.16b, #0
+	shrn	v0.8b, v0.8h, #4	// reduce to fit mask in GPR
+	fcmp	d0, #0.0
+	b.eq	.Lloop
+	fmov	x1, d0
+.Ldone:
+	sub	x0, x10, x0
+	rbit	x1, x1			// reverse bits as NEON has no ctz
+	clz	x3, x1
+	lsr	x3, x3, #2
+	add	x0, x0, x3
+	ret
+END(__strlen)

From 79e01e7e643c9337d8d6046b6db7df674475a099 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Wed, 28 Aug 2024 15:13:45 +0200
Subject: [PATCH 114/143] lib/libc/aarch64/string: add bcopy & bzero wrapper

This patch enabled usage of SIMD enhanced functions to implement
bcopy and bzero.

Tested by:	fuz (exprun)
Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46459
---
 lib/libc/aarch64/string/Makefile.inc |  4 +++-
 lib/libc/aarch64/string/bcopy.c      | 14 ++++++++++++++
 lib/libc/aarch64/string/bzero.c      | 14 ++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 lib/libc/aarch64/string/bcopy.c
 create mode 100644 lib/libc/aarch64/string/bzero.c

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 7325b54d9716fc..752cc6d9900b2c 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -30,7 +30,9 @@ MDSRCS+= \
 	memccpy.S \
 	strncat.c \
 	strlcat.c \
-	strlen.S
+	strlen.S \
+	bcopy.c \
+	bzero.c
 
 #
 # Add the above functions. Generate an asm file that includes the needed
diff --git a/lib/libc/aarch64/string/bcopy.c b/lib/libc/aarch64/string/bcopy.c
new file mode 100644
index 00000000000000..0dee529fb9dff8
--- /dev/null
+++ b/lib/libc/aarch64/string/bcopy.c
@@ -0,0 +1,14 @@
+/*-
+ * Public domain.
+ */
+
+#include <string.h>
+
+#undef bcopy	/* _FORTIFY_SOURCE */
+
+void
+bcopy(const void *src, void *dst, size_t len)
+{
+
+	memmove(dst, src, len);
+}
diff --git a/lib/libc/aarch64/string/bzero.c b/lib/libc/aarch64/string/bzero.c
new file mode 100644
index 00000000000000..d82f3061865b9d
--- /dev/null
+++ b/lib/libc/aarch64/string/bzero.c
@@ -0,0 +1,14 @@
+/*-
+ * Public domain.
+ */
+
+#include <string.h>
+
+#undef bzero	/* _FORTIFY_SOURCE */
+
+void
+bzero(void *b, size_t len)
+{
+
+	memset(b, 0, len);
+}

From ce6af7a49ec7949c70f144f1b461b587ca7efd32 Mon Sep 17 00:00:00 2001
From: Getz Mikalsen <getz@FreeBSD.org>
Date: Wed, 28 Aug 2024 15:13:55 +0200
Subject: [PATCH 115/143] share/man/man7/simd.7: document SIMD-enhanced aarch64
 functions

This documents all the newly ported SIMD-enhanced string functions
for the aarch64 platform.

Reviewed by:	fuz, emaste
Sponsored by:	Google LLC (GSoC 2024)
Relnotes:	yes
PR:		281175
Differential Revision: https://reviews.freebsd.org/D46452
---
 share/man/man7/simd.7 | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7
index 877bc77adf4be5..f60aa8ee794d18 100644
--- a/share/man/man7/simd.7
+++ b/share/man/man7/simd.7
@@ -24,7 +24,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE
 .
-.Dd June 7, 2024
+.Dd August 26, 2024
 .Dt SIMD 7
 .Os
 .Sh NAME
@@ -51,40 +51,40 @@ can be used to override this mechanism.
 Enhanced functions are present for the following architectures:
 .Bl -column FUNCTION_________ aarch64_ arm_ amd64_ i386_ ppc64_ -offset indent
 .It Em FUNCTION          Ta Em AARCH64 Ta Em ARM Ta Em AMD64  Ta Em I386 Ta Em PPC64
-.It    bcmp              Ta            Ta        Ta    S1     Ta    S
-.It    bcopy             Ta            Ta    S   Ta    S      Ta    S    Ta    SV
-.It    bzero             Ta            Ta    S   Ta    S      Ta    S
+.It    bcmp              Ta    A       Ta        Ta    S1     Ta    S
+.It    bcopy             Ta    A       Ta    S   Ta    S      Ta    S    Ta    SV
+.It    bzero             Ta    A       Ta    S   Ta    S      Ta    S
 .It    div               Ta            Ta        Ta    S      Ta    S
 .It    index             Ta    A       Ta        Ta    S1
 .It    ldiv              Ta            Ta        Ta    S      Ta    S
 .It    lldiv             Ta            Ta        Ta    S
 .It    memchr            Ta    A       Ta        Ta    S1
 .It    memcmp            Ta    A       Ta    S   Ta    S1     Ta    S
-.It    memccpy           Ta            Ta        Ta    S1
-.It    memcpy            Ta    S       Ta    S   Ta    S      Ta    S    Ta    SV
-.It    memmove           Ta    S       Ta    S   Ta    S      Ta    S    Ta    SV
+.It    memccpy           Ta    A       Ta        Ta    S1
+.It    memcpy            Ta    A       Ta    S   Ta    S      Ta    S    Ta    SV
+.It    memmove           Ta    A       Ta    S   Ta    S      Ta    S    Ta    SV
 .It    memrchr           Ta    A       Ta        Ta    S1
 .It    memset            Ta    A       Ta    S   Ta    S      Ta    S
 .It    rindex            Ta    A       Ta        Ta    S1     Ta    S
 .It    stpcpy            Ta    A       Ta        Ta    S1
 .It    stpncpy           Ta            Ta        Ta    S1
-.It    strcat            Ta            Ta        Ta    S1     Ta    S
+.It    strcat            Ta    A       Ta        Ta    S1     Ta    S
 .It    strchr            Ta    A       Ta        Ta    S1     Ta    S
 .It    strchrnul         Ta    A       Ta        Ta    S1
-.It    strcmp            Ta    S       Ta    S   Ta    S1     Ta    S
+.It    strcmp            Ta    A       Ta    S   Ta    S1     Ta    S
 .It    strcpy            Ta    A       Ta        Ta    S1     Ta    S    Ta    S2
-.It    strcspn           Ta            Ta        Ta    S2
-.It    strlcat           Ta            Ta        Ta    S1
-.It    strlcpy           Ta            Ta        Ta    S1
+.It    strcspn           Ta    S       Ta        Ta    S2
+.It    strlcat           Ta    A       Ta        Ta    S1
+.It    strlcpy           Ta    A       Ta        Ta    S1
 .It    strlen            Ta    A       Ta    S   Ta    S1
-.It    strncat           Ta            Ta        Ta    S1
-.It    strncmp           Ta    S       Ta    S   Ta    S1     Ta    S
+.It    strncat           Ta    A       Ta        Ta    S1
+.It    strncmp           Ta    A       Ta    S   Ta    S1     Ta    S
 .It    strncpy           Ta            Ta        Ta    S1     Ta         Ta    S2
 .It    strnlen           Ta    A       Ta        Ta    S1
 .It    strrchr           Ta    A       Ta        Ta    S1     Ta    S
-.It    strpbrk           Ta            Ta        Ta    S2
-.It    strsep            Ta            Ta        Ta    S2
-.It    strspn            Ta            Ta        Ta    S2
+.It    strpbrk           Ta    S       Ta        Ta    S2
+.It    strsep            Ta    S       Ta        Ta    S2
+.It    strspn            Ta    S       Ta        Ta    S2
 .It    swab              Ta            Ta        Ta           Ta    S
 .It    timingsafe_bcmp   Ta            Ta        Ta    S1
 .It    timingsafe_memcmp Ta            Ta        Ta    S

From f2c98669fc1b3fd2dbc7a7e3eedd098970a10dec Mon Sep 17 00:00:00 2001
From: Robert Clausecker <fuz@FreeBSD.org>
Date: Mon, 9 Dec 2024 10:49:49 +0100
Subject: [PATCH 116/143] lib/libc/aarch64/string: add ASIMD-enhanced
 timingsafe_bcmp implementation

A straightforward port of the amd64 implementation.

Approved by:	security (cperciva)
Reviewed by:	getz, cperciva
Event:		EuroBSDcon 2024
Differential Revision:	https://reviews.freebsd.org/D46757
---
 lib/libc/aarch64/string/Makefile.inc      |   1 +
 lib/libc/aarch64/string/timingsafe_bcmp.S | 113 ++++++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 lib/libc/aarch64/string/timingsafe_bcmp.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 752cc6d9900b2c..8019ab4adafc34 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -31,6 +31,7 @@ MDSRCS+= \
 	strncat.c \
 	strlcat.c \
 	strlen.S \
+	timingsafe_bcmp.S \
 	bcopy.c \
 	bzero.c
 
diff --git a/lib/libc/aarch64/string/timingsafe_bcmp.S b/lib/libc/aarch64/string/timingsafe_bcmp.S
new file mode 100644
index 00000000000000..baa5c6f0940cb0
--- /dev/null
+++ b/lib/libc/aarch64/string/timingsafe_bcmp.S
@@ -0,0 +1,113 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Robert Clausecker
+ */
+
+#include <machine/asm.h>
+
+ENTRY(timingsafe_bcmp)
+	cmp	x2, #32			// at least 33 bytes to process?
+	bhi	.Lgt32
+
+	cmp	x2, #16			// at least 17 bytes to process?
+	bhi	.L1732
+
+	cmp	x2, #8			// at least 9 bytes to process?
+	bhi	.L0916
+
+	cmp	x2, #4			// at least 5 bytes to process?
+	bhi	.L0508
+
+	cmp	x2, #2			// at least 3 bytes to process?
+	bhi	.L0304
+
+	cbnz	x2, .L0102		// buffer empty?
+
+	mov	w0, #0			// empty buffer always matches
+	ret
+
+.L0102:	ldrb	w3, [x0]		// load first bytes
+	ldrb	w4, [x1]
+	sub	x2, x2, #1
+	ldrb	w5, [x0, x2]		// load last bytes
+	ldrb	w6, [x1, x2]
+	eor	w3, w3, w4
+	eor	w5, w5, w6
+	orr	w0, w3, w5
+	ret
+
+.L0304:	ldrh	w3, [x0]		// load first halfwords
+	ldrh	w4, [x1]
+	sub	x2, x2, #2
+	ldrh	w5, [x0, x2]		// load last halfwords
+	ldrh	w6, [x1, x2]
+	eor	w3, w3, w4
+	eor	w5, w5, w6
+	orr	w0, w3, w5
+	ret
+
+.L0508:	ldr	w3, [x0]		// load first words
+	ldr	w4, [x1]
+	sub	x2, x2, #4
+	ldr	w5, [x0, x2]		// load last words
+	ldr	w6, [x1, x2]
+	eor	w3, w3, w4
+	eor	w5, w5, w6
+	orr	w0, w3, w5
+	ret
+
+.L0916:	ldr	x3, [x0]
+	ldr	x4, [x1]
+	sub	x2, x2, #8
+	ldr	x5, [x0, x2]
+	ldr	x6, [x1, x2]
+	eor	x3, x3, x4
+	eor	x5, x5, x6
+	orr	x0, x3, x5
+	orr	x0, x0, x0, lsr #32	// ensure low 32 bits are nonzero iff mismatch
+	ret
+
+.L1732:	ldr	q0, [x0]
+	ldr	q1, [x1]
+	sub	x2, x2, #16
+	ldr	q2, [x0, x2]
+	ldr	q3, [x1, x2]
+	eor	v0.16b, v0.16b, v1.16b
+	eor	v2.16b, v2.16b, v3.16b
+	orr	v0.16b, v0.16b, v2.16b
+	umaxv	s0, v0.4s		// get a nonzero word if any
+	mov	w0, v0.s[0]
+	ret
+
+	/* more than 32 bytes: process buffer in a loop */
+.Lgt32:	ldp	q0, q1, [x0], #32
+	ldp	q2, q3, [x1], #32
+	eor	v0.16b, v0.16b, v2.16b
+	eor	v1.16b, v1.16b, v3.16b
+	orr	v4.16b, v0.16b, v1.16b
+	subs	x2, x2, #64		// enough left for another iteration?
+	bls	.Ltail
+
+0:	ldp	q0, q1, [x0], #32
+	ldp	q2, q3, [x1], #32
+	eor	v0.16b, v0.16b, v2.16b
+	eor	v1.16b, v1.16b, v3.16b
+	orr	v0.16b, v0.16b, v1.16b
+	orr	v4.16b, v4.16b, v0.16b
+	subs	x2, x2, #32
+	bhi	0b
+
+	/* process last 32 bytes */
+.Ltail:	add	x0, x0, x2		// point to the last 32 bytes in the buffer
+	add	x1, x1, x2
+	ldp	q0, q1, [x0]
+	ldp	q2, q3, [x1]
+	eor	v0.16b, v0.16b, v2.16b
+	eor	v1.16b, v1.16b, v3.16b
+	orr	v0.16b, v0.16b, v1.16b
+	orr	v4.16b, v4.16b, v0.16b
+	umaxv	s0, v4.4s		// get a nonzero word if any
+	mov	w0, v0.s[0]
+	ret
+END(timingsafe_bcmp)

From 3f224333af163d5fcd7547a20993dcf18f19076c Mon Sep 17 00:00:00 2001
From: Robert Clausecker <fuz@FreeBSD.org>
Date: Mon, 9 Dec 2024 10:50:00 +0100
Subject: [PATCH 117/143] lib/libc/aarch64/string: add timingsafe_memcmp()
 assembly implementation

A port of the amd64 implementation with some slight changes due to
differences in instructions provided by aarch64.

No ASIMD for the same reason as the amd64 code: it's just not particularly
suitable for this application.

Event:		EuroBSDcon 2024
Approved by:	security (cperciva)
Reviewed by:	getz, cperciva
Differential Revision:	https://reviews.freebsd.org/D46758
---
 lib/libc/aarch64/string/Makefile.inc        |   1 +
 lib/libc/aarch64/string/timingsafe_memcmp.S | 117 ++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 lib/libc/aarch64/string/timingsafe_memcmp.S

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 8019ab4adafc34..9574aad9593323 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -32,6 +32,7 @@ MDSRCS+= \
 	strlcat.c \
 	strlen.S \
 	timingsafe_bcmp.S \
+	timingsafe_memcmp.S \
 	bcopy.c \
 	bzero.c
 
diff --git a/lib/libc/aarch64/string/timingsafe_memcmp.S b/lib/libc/aarch64/string/timingsafe_memcmp.S
new file mode 100644
index 00000000000000..28fdd911a3875b
--- /dev/null
+++ b/lib/libc/aarch64/string/timingsafe_memcmp.S
@@ -0,0 +1,117 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Robert Clausecker
+ */
+
+#include <machine/asm.h>
+
+ENTRY(timingsafe_memcmp)
+	cmp	x2, #16			// at least 17 bytes to process?
+	bhi	.Lgt16
+
+	cmp	x2, #8			// at least 9 bytes to process?
+	bhi	.L0916
+
+	cmp	x2, #4			// at least 5 bytes to process?
+	bhi	.L0508
+
+	cmp	x2, #2			// at least 3 bytes to process?
+	bhi	.L0304
+
+	cbnz	x2, .L0102		// buffer empty?
+
+	mov	w0, #0			// empty buffer always matches
+	ret
+
+.L0102:	ldrb	w3, [x0]		// load first bytes
+	ldrb	w4, [x1]
+	sub	x2, x2, #1
+	ldrb	w5, [x0, x2]		// load last bytes
+	ldrb	w6, [x1, x2]
+	bfi	w5, w3, #8, #8		// join bytes in big endian
+	bfi	w6, w4, #8, #8
+	sub	w0, w5, w6
+	ret
+
+
+.L0304:	ldrh	w3, [x0]		// load first halfwords
+	ldrh	w4, [x1]
+	sub	x2, x2, #2
+	ldrh	w5, [x0, x2]		// load last halfwords
+	ldrh	w6, [x1, x2]
+	bfi	w3, w5, #16, #16	// join halfwords in little endian
+	bfi	w4, w6, #16, #16
+	rev	w3, w3			// swap word order
+	rev	w4, w4
+	cmp	w3, w4
+	csetm	w0, lo			// w0 = w3 >= w4 ? 0 : -1
+	csinc	w0, w0, wzr, ls		// w0 = w3 <=> w4 ? 1 : 0 : -1
+	ret
+
+.L0508:	ldr	w3, [x0]		// load first words
+	ldr	w4, [x1]
+	sub	x2, x2, #4
+	ldr	w5, [x0, x2]		// load last words
+	ldr	w6, [x1, x2]
+	bfi	x3, x5, #32, #32	// join words in little endian
+	bfi	x4, x6, #32, #32
+	rev	x3, x3			// swap word order
+	rev	x4, x4
+	cmp	x3, x4
+	csetm	w0, lo			// x0 = x3 >= w4 ? 0 : -1
+	csinc	w0, w0, wzr, ls		// x0 = x3 <=> w4 ? 1 : 0 : -1
+	ret
+
+.L0916:	ldr	x3, [x0]
+	ldr	x4, [x1]
+	sub	x2, x2, #8
+	ldr	x5, [x0, x2]
+	ldr	x6, [x1, x2]
+	cmp	x3, x4			// mismatch in first pair?
+	csel	x3, x3, x5, ne		// use second pair if first pair equal
+	csel	x4, x4, x6, ne
+	rev	x3, x3
+	rev	x4, x4
+	cmp	x3, x4
+	csetm	w0, lo
+	csinc	w0, w0, wzr, ls
+	ret
+
+	/* more than 16 bytes: process buffer in a loop */
+.Lgt16:	ldp	x3, x4, [x0], #16
+	ldp	x5, x6, [x1], #16
+	cmp	x3, x5			// mismatch in first pair?
+	csel	x3, x3, x4, ne		// use second pair if first pair equal
+	csel	x5, x5, x6, ne
+	subs	x2, x2, #32
+	bls	.Ltail
+
+0:	ldp	x4, x7, [x0], #16
+	ldp	x6, x8, [x1], #16
+	cmp	x4, x6			// mismatch in first pair?
+	csel	x4, x4, x7, ne		// if not, try second pair
+	csel	x6, x6, x8, ne
+	cmp	x3, x5			// was there a mismatch previously?
+	csel	x3, x3, x4, ne		// apply new pair if there was not
+	csel	x5, x5, x6, ne
+	subs	x2, x2, #16
+	bhi	0b
+
+.Ltail:	add	x0, x0, x2
+	add	x1, x1, x2
+	ldp	x4, x7, [x0]
+	ldp	x6, x8, [x1]
+	cmp	x4, x6			// mismatch in first pair?
+	csel	x4, x4, x7, ne		// if not, try second pair
+	csel	x6, x6, x8, ne
+	cmp	x3, x5			// was there a mismatch previously?
+	csel	x3, x3, x4, ne		// apply new pair if there was not
+	csel	x5, x5, x6, ne
+	rev	x3, x3
+	rev	x5, x5
+	cmp	x3, x5
+	csetm	w0, lo
+	csinc	w0, w0, wzr, ls
+	ret
+END(timingsafe_bcmp)

From c15b847b183bf836148caa1a1dc10d5d86507d09 Mon Sep 17 00:00:00 2001
From: Robert Clausecker <fuz@FreeBSD.org>
Date: Mon, 18 Nov 2024 14:44:47 +0100
Subject: [PATCH 118/143] share/man/man7/simd.7: document SIMD-enhanced
 timingsafe_{b,mem}cmp

See also:	D46758, D46757
Event:		EuroBSDcon 2024
Relnotes:	yes
---
 share/man/man7/simd.7 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7
index f60aa8ee794d18..d5092348d9b396 100644
--- a/share/man/man7/simd.7
+++ b/share/man/man7/simd.7
@@ -24,7 +24,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE
 .
-.Dd August 26, 2024
+.Dd November 18, 2024
 .Dt SIMD 7
 .Os
 .Sh NAME
@@ -86,8 +86,8 @@ Enhanced functions are present for the following architectures:
 .It    strsep            Ta    S       Ta        Ta    S2
 .It    strspn            Ta    S       Ta        Ta    S2
 .It    swab              Ta            Ta        Ta           Ta    S
-.It    timingsafe_bcmp   Ta            Ta        Ta    S1
-.It    timingsafe_memcmp Ta            Ta        Ta    S
+.It    timingsafe_bcmp   Ta    A       Ta        Ta    S1
+.It    timingsafe_memcmp Ta    S       Ta        Ta    S
 .It    wcschr            Ta            Ta        Ta           Ta    S
 .It    wcscmp            Ta            Ta        Ta           Ta    S
 .It    wcslen            Ta            Ta        Ta           Ta    S

From 6b82130e6c9add4a8892ca897df5a0ec04663ea2 Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Fri, 10 Jan 2025 15:37:07 +0000
Subject: [PATCH 119/143] clock: Add a long ticks variable, ticksl

For compatibility with Linux, it's useful to have a tick counter of
width sizeof(long), but our tick counter is an int.  Currently the
linuxkpi tries paper over this difference, but this cannot really be
done reliably, so it's desirable to have a wider tick counter.  This
change introduces ticksl, keeping the existing ticks variable.

Follow a suggestion from kib to avoid having to maintain two separate
counters and to avoid converting existing code to use ticksl: change
hardclock() to update ticksl instead of ticks, and then use assembler
directives to make ticks and ticksl overlap such that loading ticks
gives the bottom 32 bits.  This makes it possible to use ticksl in the
linuxkpi without having to convert any native code, and without making
hardclock() more complicated or expensive.  Then, the linuxkpi can be
modified to use ticksl instead of ticks.

Reviewed by:	olce, kib, emaste
MFC after:	1 month
Differential Revision:	https://reviews.freebsd.org/D48383
---
 sys/conf/files        |  1 +
 sys/kern/kern_clock.c | 26 +++++++++++++------------
 sys/kern/kern_tc.c    |  4 ++--
 sys/kern/subr_param.c |  2 +-
 sys/kern/subr_ticks.s | 44 +++++++++++++++++++++++++++++++++++++++++++
 sys/sys/kernel.h      |  9 +++++++++
 sys/sys/timetc.h      |  2 +-
 7 files changed, 72 insertions(+), 16 deletions(-)
 create mode 100644 sys/kern/subr_ticks.s

diff --git a/sys/conf/files b/sys/conf/files
index d358737c561320..a630d9dd72bc57 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3932,6 +3932,7 @@ kern/subr_stack.c		optional ddb | stack | ktr
 kern/subr_stats.c		optional stats
 kern/subr_taskqueue.c		standard
 kern/subr_terminal.c		optional vt
+kern/subr_ticks.s		standard
 kern/subr_trap.c		standard
 kern/subr_turnstile.c		standard
 kern/subr_uio.c			standard
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 6fa2272ed54a93..b11c0d235139e4 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -323,7 +323,7 @@ read_cpu_time(long *cp_time)
 
 #include <sys/watchdog.h>
 
-static int watchdog_ticks;
+static long watchdog_ticks;
 static int watchdog_enabled;
 static void watchdog_fire(void);
 static void watchdog_config(void *, u_int, int *);
@@ -369,10 +369,9 @@ watchdog_attach(void)
 int	stathz;
 int	profhz;
 int	profprocs;
-volatile int	ticks;
 int	psratio;
 
-DPCPU_DEFINE_STATIC(int, pcputicks);	/* Per-CPU version of ticks. */
+DPCPU_DEFINE_STATIC(long, pcputicks);	/* Per-CPU version of ticks. */
 #ifdef DEVICE_POLLING
 static int devpoll_run = 0;
 #endif
@@ -480,14 +479,14 @@ hardclock(int cnt, int usermode)
 	struct pstats *pstats;
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
-	int *t = DPCPU_PTR(pcputicks);
-	int global, i, newticks;
+	long global, newticks, *t;
 
 	/*
 	 * Update per-CPU and possibly global ticks values.
 	 */
+	t = DPCPU_PTR(pcputicks);
 	*t += cnt;
-	global = ticks;
+	global = atomic_load_long(&ticksl);
 	do {
 		newticks = *t - global;
 		if (newticks <= 0) {
@@ -496,7 +495,7 @@ hardclock(int cnt, int usermode)
 			newticks = 0;
 			break;
 		}
-	} while (!atomic_fcmpset_int(&ticks, &global, *t));
+	} while (!atomic_fcmpset_long(&ticksl, &global, *t));
 
 	/*
 	 * Run current process's virtual and profile time, as needed.
@@ -525,8 +524,10 @@ hardclock(int cnt, int usermode)
 		}
 #endif /* DEVICE_POLLING */
 		if (watchdog_enabled > 0) {
-			i = atomic_fetchadd_int(&watchdog_ticks, -newticks);
-			if (i > 0 && i <= newticks)
+			long left;
+
+			left = atomic_fetchadd_long(&watchdog_ticks, -newticks);
+			if (left > 0 && left <= newticks)
 				watchdog_fire();
 		}
 		intr_event_handle(clk_intr_event, NULL);
@@ -540,11 +541,12 @@ hardclock(int cnt, int usermode)
 void
 hardclock_sync(int cpu)
 {
-	int *t;
+	long *t;
+
 	KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu));
-	t = DPCPU_ID_PTR(cpu, pcputicks);
 
-	*t = ticks;
+	t = DPCPU_ID_PTR(cpu, pcputicks);
+	*t = ticksl;
 }
 
 /*
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 26f09cb602603d..a797a101bf6f3a 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -1916,9 +1916,9 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0,
     "Approximate number of hardclock ticks in a millisecond");
 
 void
-tc_ticktock(int cnt)
+tc_ticktock(long cnt)
 {
-	static int count;
+	static long count;
 
 	if (mtx_trylock_spin(&tc_setclock_mtx)) {
 		count += cnt;
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index 19169ba63061a3..f4359efec46687 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -197,7 +197,7 @@ init_param1(void)
 	 * Arrange for ticks to wrap 10 minutes after boot to help catch
 	 * sign problems sooner.
 	 */
-	ticks = INT_MAX - (hz * 10 * 60);
+	ticksl = INT_MAX - (hz * 10 * 60);
 
 	vn_lock_pair_pause_max = hz / 100;
 	if (vn_lock_pair_pause_max == 0)
diff --git a/sys/kern/subr_ticks.s b/sys/kern/subr_ticks.s
new file mode 100644
index 00000000000000..6565ba42413783
--- /dev/null
+++ b/sys/kern/subr_ticks.s
@@ -0,0 +1,44 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Mark Johnston <markj@FreeBSD.org>
+ */
+
+/*
+ * Define the "ticks" and "ticksl" variables.  The former is overlaid onto the
+ * low bits of the latter.
+ */
+
+#if defined(__aarch64__)
+#include <sys/elf_common.h>
+#include <machine/asm.h>
+
+GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
+#endif
+
+#ifdef _ILP32
+#define	SIZEOF_TICKSL	4
+#define	TICKSL_INIT	.long 0
+#else
+#define	SIZEOF_TICKSL	8
+#define	TICKSL_INIT	.quad 0
+#endif
+
+#if defined(_ILP32) || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define	TICKS_OFFSET	0
+#else
+#define	TICKS_OFFSET	4
+#endif
+
+	.data
+
+	.global ticksl
+	.type ticksl, %object
+	.align SIZEOF_TICKSL
+ticksl:	TICKSL_INIT
+	.size ticksl, SIZEOF_TICKSL
+
+	.global ticks
+	.type ticks, %object
+ticks	=ticksl + TICKS_OFFSET
+	.size ticks, 4
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index 3144b5a50c9857..3800990921076f 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -65,7 +65,16 @@ extern int psratio;			/* ratio: prof / stat */
 extern int stathz;			/* statistics clock's frequency */
 extern int profhz;			/* profiling clock's frequency */
 extern int profprocs;			/* number of process's profiling */
+
+/*
+ * The ticks and ticksl symbols overlap, giving a 64-bit tick counter on 64-bit
+ * platforms while still maintaining compatibility with the legacy 32-bit
+ * counter.  Either value can be used, but rollover must be handled; at 1000Hz,
+ * ticks (and ticksl on 32-bit platforms) roll over roughly every 25 days.  On
+ * 64-bit platforms, ticksl will not roll over in the foreseeable future.
+ */
 extern volatile int ticks;
+extern volatile long ticksl;
 
 #endif /* _KERNEL */
 
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
index 1d9b18620e96c5..52277086842576 100644
--- a/sys/sys/timetc.h
+++ b/sys/sys/timetc.h
@@ -87,7 +87,7 @@ extern int tc_min_ticktock_freq; /*
 u_int64_t tc_getfrequency(void);
 void	tc_init(struct timecounter *tc);
 void	tc_setclock(struct timespec *ts);
-void	tc_ticktock(int cnt);
+void	tc_ticktock(long cnt);
 void	cpu_tick_calibration(void);
 
 #ifdef SYSCTL_DECL

From 9eb30ef4b7a0ca1ef7bcc871b6391d98b00c259f Mon Sep 17 00:00:00 2001
From: Mitchell Horne <mhorne@FreeBSD.org>
Date: Fri, 10 Jan 2025 13:57:36 -0400
Subject: [PATCH 120/143] riscv: enable Allwinner D1 USB drivers

Add the generic USB drivers and FDT glue to the build.

Make small tweaks to the aw_usbphy and aw_musb drivers for the Allwinner
D1.

Reviewed by:	manu
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D48126
---
 sys/arm/allwinner/aw_usbphy.c               | 9 +++++++++
 sys/conf/files.riscv                        | 6 ++++++
 sys/dev/usb/controller/musb_otg_allwinner.c | 5 +++--
 sys/riscv/allwinner/files.allwinner         | 3 +++
 sys/riscv/conf/std.allwinner                | 3 +++
 5 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sys/arm/allwinner/aw_usbphy.c b/sys/arm/allwinner/aw_usbphy.c
index b0ef7d9da0a98c..97c3d220777320 100644
--- a/sys/arm/allwinner/aw_usbphy.c
+++ b/sys/arm/allwinner/aw_usbphy.c
@@ -56,6 +56,7 @@ enum awusbphy_type {
 	AWUSBPHY_TYPE_A64,
 	AWUSBPHY_TYPE_A83T,
 	AWUSBPHY_TYPE_H6,
+	AWUSBPHY_TYPE_D1,
 };
 
 struct aw_usbphy_conf {
@@ -121,6 +122,13 @@ static const struct aw_usbphy_conf h6_usbphy_conf = {
 	.phy0_route = true,
 };
 
+static const struct aw_usbphy_conf d1_usbphy_conf = {
+	.num_phys = 2,
+	.phy_type = AWUSBPHY_TYPE_D1,
+	.pmu_unk1 = true,
+	.phy0_route = true,
+};
+
 static struct ofw_compat_data compat_data[] = {
 	{ "allwinner,sun4i-a10-usb-phy",	(uintptr_t)&a10_usbphy_conf },
 	{ "allwinner,sun5i-a13-usb-phy",	(uintptr_t)&a13_usbphy_conf },
@@ -130,6 +138,7 @@ static struct ofw_compat_data compat_data[] = {
 	{ "allwinner,sun50i-a64-usb-phy",	(uintptr_t)&a64_usbphy_conf },
 	{ "allwinner,sun8i-a83t-usb-phy",	(uintptr_t)&a83t_usbphy_conf },
 	{ "allwinner,sun50i-h6-usb-phy",	(uintptr_t)&h6_usbphy_conf },
+	{ "allwinner,sun20i-d1-usb-phy",	(uintptr_t)&d1_usbphy_conf },
 	{ NULL,					0 }
 };
 
diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv
index 534fe5013c568c..514c955181c38e 100644
--- a/sys/conf/files.riscv
+++ b/sys/conf/files.riscv
@@ -12,6 +12,12 @@ dev/pci/pci_host_generic.c	optional	pci
 dev/pci/pci_host_generic_fdt.c	optional	pci fdt
 dev/uart/uart_cpu_fdt.c		optional	uart fdt
 dev/uart/uart_dev_lowrisc.c	optional	uart_lowrisc
+dev/usb/controller/generic_ehci.c		optional ehci
+dev/usb/controller/generic_ehci_fdt.c		optional ehci fdt
+dev/usb/controller/generic_ohci.c		optional ohci fdt
+dev/usb/controller/generic_usb_if.m		optional ohci fdt
+dev/usb/controller/generic_xhci.c		optional xhci
+dev/usb/controller/generic_xhci_fdt.c		optional xhci fdt
 dev/vmm/vmm_dev.c		optional	vmm
 dev/vmm/vmm_stat.c		optional	vmm
 dev/xilinx/axi_quad_spi.c	optional	xilinx_spi
diff --git a/sys/dev/usb/controller/musb_otg_allwinner.c b/sys/dev/usb/controller/musb_otg_allwinner.c
index 574e8e712713ca..781b4d7e33fae9 100644
--- a/sys/dev/usb/controller/musb_otg_allwinner.c
+++ b/sys/dev/usb/controller/musb_otg_allwinner.c
@@ -77,7 +77,7 @@
 #if defined(__arm__)
 #define	bs_parent_space(bs)	((bs)->bs_parent)
 typedef bus_space_tag_t	awusb_bs_tag;
-#elif defined(__aarch64__)
+#elif defined(__aarch64__) || defined(__riscv)
 #define	bs_parent_space(bs)	(bs)
 typedef void *		awusb_bs_tag;
 #endif
@@ -89,6 +89,7 @@ static struct ofw_compat_data compat_data[] = {
 	{ "allwinner,sun6i-a31-musb",	AWUSB_OKAY },
 	{ "allwinner,sun8i-a33-musb",	AWUSB_OKAY | AWUSB_NO_CONFDATA },
 	{ "allwinner,sun8i-h3-musb",	AWUSB_OKAY | AWUSB_NO_CONFDATA },
+	{ "allwinner,sun20i-d1-musb",	AWUSB_OKAY | AWUSB_NO_CONFDATA },
 	{ NULL,				0 }
 };
 
@@ -474,7 +475,7 @@ awusbdrd_attach(device_t dev)
 
 #if defined(__arm__)
 	sc->bs.bs_parent = rman_get_bustag(sc->res[0]);
-#elif defined(__aarch64__)
+#elif defined(__aarch64__) || defined(__riscv)
 	sc->bs.bs_cookie = rman_get_bustag(sc->res[0]);
 #endif
 
diff --git a/sys/riscv/allwinner/files.allwinner b/sys/riscv/allwinner/files.allwinner
index f55d883abf57b9..a87d79dfda2a10 100644
--- a/sys/riscv/allwinner/files.allwinner
+++ b/sys/riscv/allwinner/files.allwinner
@@ -3,6 +3,7 @@ arm/allwinner/aw_gpio.c			optional gpio aw_gpio fdt
 arm/allwinner/aw_rtc.c			optional aw_rtc fdt
 arm/allwinner/aw_syscon.c		optional syscon
 arm/allwinner/aw_sid.c			optional aw_sid nvmem
+arm/allwinner/aw_usbphy.c		optional ehci aw_usbphy fdt
 arm/allwinner/aw_wdog.c			optional aw_wdog
 arm/allwinner/if_awg.c			optional awg syscon
 
@@ -18,4 +19,6 @@ dev/clk/allwinner/aw_clk_np.c		optional aw_ccu fdt
 dev/clk/allwinner/aw_clk_prediv_mux.c	optional aw_ccu fdt
 dev/clk/allwinner/ccu_d1.c		optional soc_allwinner_d1 aw_ccu fdt
 
+dev/usb/controller/musb_otg_allwinner.c	optional musb fdt
+
 riscv/allwinner/d1_padconf.c            optional soc_allwinner_d1 aw_gpio fdt
diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner
index a888e4d5d370c0..5e7a6c0e0a52d3 100644
--- a/sys/riscv/conf/std.allwinner
+++ b/sys/riscv/conf/std.allwinner
@@ -9,7 +9,10 @@ device		aw_ccu		# Allwinner clock controller
 device		aw_gpio		# Allwinner GPIO controller
 device		aw_rtc		# Allwinner Real-time Clock
 device		aw_sid		# Allwinner Secure ID EFUSE
+device		aw_usbphy	# Allwinner USB PHY
 device		aw_wdog		# Allwinner Watchdog
 device		awg		# Allwinner EMAC Gigabit Ethernet
 
+device		musb		# Mentor Graphics USB OTG controller
+
 files		"../allwinner/files.allwinner"

From aa766e2a03f0eb2fb6272828865c83a807b81cf1 Mon Sep 17 00:00:00 2001
From: Mitchell Horne <mhorne@FreeBSD.org>
Date: Fri, 10 Jan 2025 14:46:01 -0400
Subject: [PATCH 121/143] ofw_cpu: fix __riscv preprocessor check

The canonical name is __riscv, not __riscv__. Newer compilers no longer
emit the latter.

This re-enables finding the nominal frequency from the CPU's clock.

I checked, and there are no remaining mistakes like this in the tree.

Reviewed by:	jrtc27, imp, jhb
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D48122
---
 sys/dev/ofw/ofw_cpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sys/dev/ofw/ofw_cpu.c b/sys/dev/ofw/ofw_cpu.c
index ad0fd670421adc..cbca8caee1869b 100644
--- a/sys/dev/ofw/ofw_cpu.c
+++ b/sys/dev/ofw/ofw_cpu.c
@@ -42,7 +42,7 @@
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/ofw_cpu.h>
 
-#if defined(__arm__) || defined(__arm64__) || defined(__riscv__)
+#if defined(__arm__) || defined(__arm64__) || defined(__riscv)
 #include <dev/clk/clk.h>
 #endif
 
@@ -206,7 +206,7 @@ ofw_cpu_attach(device_t dev)
 	phandle_t node;
 	pcell_t cell;
 	int rv;
-#if defined(__arm__) || defined(__arm64__) || defined(__riscv__)
+#if defined(__arm__) || defined(__arm64__) || defined(__riscv)
 	clk_t cpuclk;
 	uint64_t freq;
 #endif
@@ -276,7 +276,7 @@ ofw_cpu_attach(device_t dev)
 	sc->sc_cpu_pcpu = pcpu_find(device_get_unit(dev));
 
 	if (OF_getencprop(node, "clock-frequency", &cell, sizeof(cell)) < 0) {
-#if defined(__arm__) || defined(__arm64__) || defined(__riscv__)
+#if defined(__arm__) || defined(__arm64__) || defined(__riscv)
 		rv = clk_get_by_ofw_index(dev, 0, 0, &cpuclk);
 		if (rv == 0) {
 			rv = clk_get_freq(cpuclk, &freq);

From 4ea0c3f04f42119dff92317c0e4cef52350ed9db Mon Sep 17 00:00:00 2001
From: Mitchell Horne <mhorne@FreeBSD.org>
Date: Fri, 10 Jan 2025 14:46:43 -0400
Subject: [PATCH 122/143] ofw_cpu: collapse some #ifdef code

Mainly, to avoid repeating the list of architectures, #define HAS_CLK.

Further, split the clk code into a helper function, which is a stub in
the !HAS_CLK case. This aids in overall legibility.

While here, add one separating whitespace, again for legibility.

Reviewed by:	jhb
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D48149
---
 sys/dev/ofw/ofw_cpu.c | 43 +++++++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/sys/dev/ofw/ofw_cpu.c b/sys/dev/ofw/ofw_cpu.c
index cbca8caee1869b..339716a946ff45 100644
--- a/sys/dev/ofw/ofw_cpu.c
+++ b/sys/dev/ofw/ofw_cpu.c
@@ -44,6 +44,7 @@
 
 #if defined(__arm__) || defined(__arm64__) || defined(__riscv)
 #include <dev/clk/clk.h>
+#define	HAS_CLK
 #endif
 
 static int	ofw_cpulist_probe(device_t);
@@ -198,6 +199,30 @@ ofw_cpu_probe(device_t dev)
 	return (0);
 }
 
+static int
+get_freq_from_clk(device_t dev, struct ofw_cpu_softc *sc)
+{
+#ifdef HAS_CLK
+	clk_t cpuclk;
+	uint64_t freq;
+	int rv;
+
+	rv = clk_get_by_ofw_index(dev, 0, 0, &cpuclk);
+	if (rv == 0) {
+		rv = clk_get_freq(cpuclk, &freq);
+		if (rv != 0 && bootverbose)
+			device_printf(dev,
+			    "Cannot get freq of property clocks\n");
+		else
+			sc->sc_nominal_mhz = freq / 1000000;
+	}
+
+	return (rv);
+#else
+	return (ENODEV);
+#endif
+}
+
 static int
 ofw_cpu_attach(device_t dev)
 {
@@ -206,10 +231,6 @@ ofw_cpu_attach(device_t dev)
 	phandle_t node;
 	pcell_t cell;
 	int rv;
-#if defined(__arm__) || defined(__arm64__) || defined(__riscv)
-	clk_t cpuclk;
-	uint64_t freq;
-#endif
 
 	sc = device_get_softc(dev);
 	psc = device_get_softc(device_get_parent(dev));
@@ -276,18 +297,7 @@ ofw_cpu_attach(device_t dev)
 	sc->sc_cpu_pcpu = pcpu_find(device_get_unit(dev));
 
 	if (OF_getencprop(node, "clock-frequency", &cell, sizeof(cell)) < 0) {
-#if defined(__arm__) || defined(__arm64__) || defined(__riscv)
-		rv = clk_get_by_ofw_index(dev, 0, 0, &cpuclk);
-		if (rv == 0) {
-			rv = clk_get_freq(cpuclk, &freq);
-			if (rv != 0 && bootverbose)
-				device_printf(dev,
-				    "Cannot get freq of property clocks\n");
-			else
-				sc->sc_nominal_mhz = freq / 1000000;
-		} else
-#endif
-		{
+		if (get_freq_from_clk(dev, sc) != 0) {
 			if (bootverbose)
 				device_printf(dev,
 				    "missing 'clock-frequency' property\n");
@@ -298,6 +308,7 @@ ofw_cpu_attach(device_t dev)
 	if (sc->sc_nominal_mhz != 0 && bootverbose)
 		device_printf(dev, "Nominal frequency %dMhz\n",
 		    sc->sc_nominal_mhz);
+
 	bus_identify_children(dev);
 	bus_attach_children(dev);
 	return (0);

From ac9de183f37006fc2089757779d6d5065a530d5b Mon Sep 17 00:00:00 2001
From: Mitchell Horne <mhorne@FreeBSD.org>
Date: Fri, 10 Jan 2025 14:46:56 -0400
Subject: [PATCH 123/143] ofw_cpu: check for "disabled" status during probe

Some RISC-V CPUs contain a "monitor core" with limited functionality (no
MMU). These cores appear in some device trees, but we don't run the
kernel on them; in early CPU start-up code we skip them, and they have
no impact on mp_ncpu. It seems the new trend is to mark these monitor
cores with a 'status' property of 'disabled'.

However, we still instantiate an ofw_cpu pseudo device for the disabled
core. This is generally harmless, but there is an impact when attempting
to attach the cpufreq_dt driver. It counts more OFW CPU devices (unit
number) than logical CPUs (mp_ncpus), and therefore fails to attach for
the last logical CPU.

The solution is to check the status property in ofw_cpu_probe(), and
fail if the core is marked "disabled". This is subject to the same
exception already in ofw_cpu_early_foreach(); that is, if a disabled CPU
has an 'enable-method' property, it can be used by the kernel.

Reviewed by:	andrew, jrtc27
MFC after:	1 month
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D48123
---
 sys/dev/ofw/ofw_cpu.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/sys/dev/ofw/ofw_cpu.c b/sys/dev/ofw/ofw_cpu.c
index 339716a946ff45..888af0440746e0 100644
--- a/sys/dev/ofw/ofw_cpu.c
+++ b/sys/dev/ofw/ofw_cpu.c
@@ -182,6 +182,24 @@ static driver_t ofw_cpu_driver = {
 
 DRIVER_MODULE(ofw_cpu, cpulist, ofw_cpu_driver, 0, 0);
 
+static bool
+ofw_cpu_is_runnable(phandle_t node)
+{
+	/*
+	 * Per the DeviceTree Specification, a cpu node (under /cpus) that
+	 * has 'status = disabled' indicates that "the CPU is in a quiescent
+	 * state."
+	 *
+	 * A quiescent CPU that specifies an "enable-method", such as
+	 * "spin-table", can still be used by the kernel.
+	 *
+	 * Lacking this, any CPU marked "disabled" or other non-okay status
+	 * should be excluded from the kernel's view.
+	 */
+	return (ofw_bus_node_status_okay(node) ||
+	    OF_hasprop(node, "enable-method"));
+}
+
 static int
 ofw_cpu_probe(device_t dev)
 {
@@ -190,6 +208,9 @@ ofw_cpu_probe(device_t dev)
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return (ENXIO);
 
+	if (!ofw_cpu_is_runnable(ofw_bus_get_node(dev)))
+		return (ENXIO);
+
 	device_set_desc(dev, "Open Firmware CPU");
 	if (!bootverbose && device_get_unit(dev) != 0) {
 		device_quiet(dev);
@@ -352,7 +373,6 @@ ofw_cpu_early_foreach(ofw_cpu_foreach_cb callback, bool only_runnable)
 {
 	phandle_t node, child;
 	pcell_t addr_cells, reg[2];
-	char status[16];
 	char device_type[16];
 	u_int id, next_id;
 	int count, rv;
@@ -389,14 +409,8 @@ ofw_cpu_early_foreach(ofw_cpu_foreach_cb callback, bool only_runnable)
 		 * those that have been enabled, or do provide a method
 		 * to enable them.
 		 */
-		if (only_runnable) {
-			status[0] = '\0';
-			OF_getprop(child, "status", status, sizeof(status));
-			if (status[0] != '\0' && strcmp(status, "okay") != 0 &&
-				strcmp(status, "ok") != 0 &&
-				!OF_hasprop(child, "enable-method"))
-					continue;
-		}
+		if (only_runnable && !ofw_cpu_is_runnable(child))
+			continue;
 
 		/*
 		 * Check we have a register to identify the cpu

From def7999c2ccddc9a303a65c0bea22976e79d8613 Mon Sep 17 00:00:00 2001
From: Mitchell Horne <mhorne@FreeBSD.org>
Date: Tue, 8 Oct 2024 15:49:11 -0300
Subject: [PATCH 124/143] riscv: enable cpufreq_dt driver

Implement the small amount of MD code required; copied from arm/arm64.

One tweak is made to cpufreq_dt itself: if the opp-shared property is
missing, but there is only one CPU, then we can still attach. This is
relevant for the single-core Allwinner D1.

Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D48124
---
 sys/conf/files.riscv         |  1 +
 sys/dev/cpufreq/cpufreq_dt.c |  2 +-
 sys/riscv/conf/GENERIC       |  3 +++
 sys/riscv/include/pcpu.h     |  3 ++-
 sys/riscv/riscv/machdep.c    | 12 +++++++++++-
 5 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv
index 514c955181c38e..36eea03f29a1be 100644
--- a/sys/conf/files.riscv
+++ b/sys/conf/files.riscv
@@ -4,6 +4,7 @@ cddl/dev/dtrace/riscv/dtrace_subr.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/dtrace/riscv/instr_size.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/fbt/riscv/fbt_isa.c				optional dtrace_fbt | dtraceall compile-with "${FBT_C}"
 crypto/des/des_enc.c		optional	netsmb
+dev/cpufreq/cpufreq_dt.c	optional	cpufreq fdt
 dev/ofw/ofw_cpu.c		optional	fdt
 dev/ofw/ofw_pcib.c		optional 	pci fdt
 dev/pci/pci_dw.c		optional	pci fdt
diff --git a/sys/dev/cpufreq/cpufreq_dt.c b/sys/dev/cpufreq/cpufreq_dt.c
index 929eebfe7dc548..e35a8ec73ef487 100644
--- a/sys/dev/cpufreq/cpufreq_dt.c
+++ b/sys/dev/cpufreq/cpufreq_dt.c
@@ -401,7 +401,7 @@ cpufreq_dt_oppv2_parse(struct cpufreq_dt_softc *sc, phandle_t node)
 	if (opp_table == opp_xref)
 		return (ENXIO);
 
-	if (!OF_hasprop(opp_table, "opp-shared")) {
+	if (!OF_hasprop(opp_table, "opp-shared") && mp_ncpus > 1) {
 		device_printf(sc->dev, "Only opp-shared is supported\n");
 		return (ENXIO);
 	}
diff --git a/sys/riscv/conf/GENERIC b/sys/riscv/conf/GENERIC
index 23d8a4e47eee22..34426f16796343 100644
--- a/sys/riscv/conf/GENERIC
+++ b/sys/riscv/conf/GENERIC
@@ -90,6 +90,9 @@ device		syscon
 device		syscon_power
 device		riscv_syscon
 
+# CPU frequency control
+device		cpufreq
+
 # Bus drivers
 device		pci
 
diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h
index d00226defc2f17..f11060496963be 100644
--- a/sys/riscv/include/pcpu.h
+++ b/sys/riscv/include/pcpu.h
@@ -46,7 +46,8 @@
 	struct pmap *pc_curpmap;	/* Currently active pmap */	\
 	uint32_t pc_pending_ipis;	/* IPIs pending to this CPU */	\
 	uint32_t pc_hart;		/* Hart ID */			\
-	char __pad[56]			/* Pad to factor of PAGE_SIZE */
+	uint64_t pc_clock;						\
+	char __pad[48]			/* Pad to factor of PAGE_SIZE */
 
 #ifdef _KERNEL
 
diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c
index c5da4832dd367b..c0d4b7cc27262d 100644
--- a/sys/riscv/riscv/machdep.c
+++ b/sys/riscv/riscv/machdep.c
@@ -219,8 +219,18 @@ cpu_flush_dcache(void *ptr, size_t len)
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
+	struct pcpu *pc;
 
-	panic("cpu_est_clockrate");
+	pc = pcpu_find(cpu_id);
+	if (pc == NULL || rate == NULL)
+		return (EINVAL);
+
+	if (pc->pc_clock == 0)
+		return (EOPNOTSUPP);
+
+	*rate = pc->pc_clock;
+
+	return (0);
 }
 
 void

From 9234a50752cd47887849d4665af0f9f4abdefb5d Mon Sep 17 00:00:00 2001
From: Seyed Pouria Mousavizadeh Tehrani <info@spmzt.net>
Date: Fri, 10 Jan 2025 12:46:18 +0330
Subject: [PATCH 125/143] ng_ksocket: add IPv6 support for address parsing and
 unparsing

Differential Revision:	https://reviews.freebsd.org/D48204
---
 share/man/man4/ng_ksocket.4 | 13 +++--
 sys/netgraph/ng_ksocket.c   | 94 ++++++++++++++++++++++++++++++++++---
 2 files changed, 98 insertions(+), 9 deletions(-)

diff --git a/share/man/man4/ng_ksocket.4 b/share/man/man4/ng_ksocket.4
index 1f32d39dc7d000..bb653c3688ad4d 100644
--- a/share/man/man4/ng_ksocket.4
+++ b/share/man/man4/ng_ksocket.4
@@ -32,7 +32,7 @@
 .\"
 .\" Author: Archie Cobbs <archie@FreeBSD.org>
 .\"
-.Dd January 9, 2012
+.Dd January 9, 2025
 .Dt NG_KSOCKET 4
 .Os
 .Sh NAME
@@ -183,7 +183,8 @@ in the argument field, the normal
 equivalent of the C structure
 is an acceptable form.
 For the
-.Dv PF_INET
+.Dv PF_INET ,
+.Dv PF_INET6
 and
 .Dv PF_LOCAL
 address families, a more convenient form is also used, which is
@@ -191,7 +192,11 @@ the protocol family name, followed by a slash, followed by the actual
 address.
 For
 .Dv PF_INET ,
-the address is an IP address followed by an optional colon and port number.
+the address is an IPv4 address followed by an optional colon and port number.
+For
+.Dv PF_INET6 ,
+the address is an IPv6 address enclosed in square brackets followed
+by an optional colon and port number.
 For
 .Dv PF_LOCAL ,
 the address is the pathname as a doubly quoted string.
@@ -202,6 +207,8 @@ Examples:
 local/"/tmp/foo.socket"
 .It Dv PF_INET
 inet/192.168.1.1:1234
+.It Dv PF_INET6
+inet6/[2001::1]:1234
 .It Other
 .Dv "\&{ family=16 len=16 data=[0x70 0x00 0x01 0x23] \&}"
 .El
diff --git a/sys/netgraph/ng_ksocket.c b/sys/netgraph/ng_ksocket.c
index 3e4427f9e387be..43a2747224f309 100644
--- a/sys/netgraph/ng_ksocket.c
+++ b/sys/netgraph/ng_ksocket.c
@@ -58,6 +58,9 @@
 #include <sys/uio.h>
 #include <sys/un.h>
 
+#include <net/if.h>
+#include <net/if_var.h>
+
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/ng_parse.h>
@@ -66,6 +69,8 @@
 #include <netinet/in.h>
 #include <netinet/ip.h>
 
+#include <netinet6/scope6_var.h>
+
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_KSOCKET, "netgraph_ksock",
     "netgraph ksock node");
@@ -147,6 +152,19 @@ static const struct ng_ksocket_alias ng_ksocket_protos[] = {
 	{ "swipe",	IPPROTO_SWIPE,		PF_INET		},
 	{ "encap",	IPPROTO_ENCAP,		PF_INET		},
 	{ "pim",	IPPROTO_PIM,		PF_INET		},
+	{ "ip6",	IPPROTO_IPV6,		PF_INET6	},
+	{ "raw6",	IPPROTO_RAW,		PF_INET6	},
+	{ "icmp6",	IPPROTO_ICMPV6,		PF_INET6	},
+	{ "igmp6",	IPPROTO_IGMP,		PF_INET6	},
+	{ "tcp6",	IPPROTO_TCP,		PF_INET6	},
+	{ "udp6",	IPPROTO_UDP,		PF_INET6	},
+	{ "gre6",	IPPROTO_GRE,		PF_INET6	},
+	{ "esp6",	IPPROTO_ESP,		PF_INET6	},
+	{ "ah6",	IPPROTO_AH,		PF_INET6	},
+	{ "swipe6",	IPPROTO_SWIPE,		PF_INET6	},
+	{ "encap6",	IPPROTO_ENCAP,		PF_INET6	},
+	{ "divert6",	IPPROTO_DIVERT,		PF_INET6	},
+	{ "pim6",	IPPROTO_PIM,		PF_INET6	},
 	{ NULL,		-1					},
 };
 
@@ -296,9 +314,58 @@ ng_ksocket_sockaddr_parse(const struct ng_parse_type *type,
 		break;
 	    }
 
-#if 0
-	case PF_INET6:	/* XXX implement this someday */
-#endif
+	case PF_INET6:
+	    {
+		struct sockaddr_in6 *const sin6 = (struct sockaddr_in6 *)sa;
+		char *eptr;
+		char addr[INET6_ADDRSTRLEN];
+		char ifname[16];
+		u_long port;
+		bool hasifname = true;
+
+		/* RFC 3986 Section 3.2.2, Validate IP literal within square brackets. */
+		if (s[*off] == '[' && (strstr(&s[*off], "]")))
+			(*off)++;
+		else
+			return (EINVAL);
+		if ((eptr = strstr(&s[*off], "%")) == NULL) {
+			hasifname = false;
+			eptr = strstr(&s[*off], "]");
+		}
+		snprintf(addr, eptr - (s + *off) + 1, "%s", &s[*off]);
+		*off += (eptr - (s + *off));
+		if (!inet_pton(AF_INET6, addr, &sin6->sin6_addr))
+			return (EINVAL);
+
+		if (hasifname) {
+			uint16_t scope;
+
+			eptr = strstr(&s[*off], "]");
+			(*off)++;
+			snprintf(ifname, eptr - (s + *off) + 1, "%s", &s[*off]);
+			*off += (eptr - (s + *off));
+
+			if (sin6->sin6_addr.s6_addr16[0] != IPV6_ADDR_INT16_ULL)
+				return (EINVAL);
+			scope = in6_getscope(&sin6->sin6_addr);
+			sin6->sin6_scope_id =
+			    in6_getscopezone(ifunit(ifname), scope);
+		}
+
+		(*off)++;
+		if (s[*off] == ':') {
+			(*off)++;
+			port = strtoul(s + *off, &eptr, 10);
+			if (port > 0xffff || eptr == s + *off)
+				return (EINVAL);
+			*off += (eptr - (s + *off));
+			sin6->sin6_port = htons(port);
+		} else
+			sin6->sin6_port = 0;
+
+		sin6->sin6_len = sizeof(*sin6);
+		break;
+	    }
 
 	default:
 		return (EINVAL);
@@ -358,9 +425,24 @@ ng_ksocket_sockaddr_unparse(const struct ng_parse_type *type,
 		return(0);
 	    }
 
-#if 0
-	case PF_INET6:	/* XXX implement this someday */
-#endif
+	case PF_INET6:
+	    {
+		const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa;
+		char addr[INET6_ADDRSTRLEN];
+
+		inet_ntop(AF_INET6, &sin6->sin6_addr, addr, INET6_ADDRSTRLEN);
+		slen += snprintf(cbuf, cbuflen, "inet6/[%s]", addr);
+
+		if (sin6->sin6_port != 0) {
+			slen += snprintf(cbuf + strlen(cbuf),
+			    cbuflen - strlen(cbuf), ":%d",
+			    (u_int)ntohs(sin6->sin6_port));
+		}
+		if (slen >= cbuflen)
+			return (ERANGE);
+		*off += sizeof(*sin6);
+		return(0);
+	    }
 
 	default:
 		return (*ng_ksocket_generic_sockaddr_type.supertype->unparse)

From f021e3573519ff192fc708cda9ca4bba264c96f7 Mon Sep 17 00:00:00 2001
From: Shteryana Shopova <syrinx@FreeBSD.org>
Date: Fri, 10 Jan 2025 15:30:21 -0500
Subject: [PATCH 126/143] bsnmpwalk: Fix crash on invalid data

PR:		258570
Reported by:	Robert Morris <rtm@lcs.mit.edu>
Reviewed by:	emaste, markj
Differential Revision: https://reviews.freebsd.org/D48422
---
 usr.sbin/bsnmpd/tools/bsnmptools/bsnmpget.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/usr.sbin/bsnmpd/tools/bsnmptools/bsnmpget.c b/usr.sbin/bsnmpd/tools/bsnmptools/bsnmpget.c
index 81108387d3a4ad..9d5a693c7c687d 100644
--- a/usr.sbin/bsnmpd/tools/bsnmptools/bsnmpget.c
+++ b/usr.sbin/bsnmpd/tools/bsnmptools/bsnmpget.c
@@ -502,7 +502,7 @@ snmptool_walk(struct snmp_toolinfo *snmptoolctx)
 
 			outputs += rc;
 
-			if ((u_int)rc < resp.nbindings) {
+			if ((u_int)rc < resp.nbindings || resp.nbindings == 0) {
 				snmp_pdu_free(&resp);
 				break;
 			}

From b2b974f7ef4cddff251d0de69d8da13232b25e4d Mon Sep 17 00:00:00 2001
From: Mark Johnston <markj@FreeBSD.org>
Date: Fri, 10 Jan 2025 23:59:47 +0000
Subject: [PATCH 127/143] clock: Simplify subr_ticks and rename

- We can use builtin constants for the size of int and long to simplify
  definitions.
- The file should have a .S prefix since we want to run it through the
  preprocessor, though apparently this happens anyway with .s...
- Move ticks and ticksl from .data to .bss.

Reported by:	jrtc27
Reviewed by:	jrtc27, kib, emaste
Fixes:		6b82130e6c9a ("clock: Add a long ticks variable, ticksl")
Differential Revision:	https://reviews.freebsd.org/D48420
---
 sys/conf/files                          |  2 +-
 sys/kern/{subr_ticks.s => subr_ticks.S} | 22 +++++++---------------
 2 files changed, 8 insertions(+), 16 deletions(-)
 rename sys/kern/{subr_ticks.s => subr_ticks.S} (62%)

diff --git a/sys/conf/files b/sys/conf/files
index a630d9dd72bc57..fc9108b5e10f3b 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3932,7 +3932,7 @@ kern/subr_stack.c		optional ddb | stack | ktr
 kern/subr_stats.c		optional stats
 kern/subr_taskqueue.c		standard
 kern/subr_terminal.c		optional vt
-kern/subr_ticks.s		standard
+kern/subr_ticks.S		standard
 kern/subr_trap.c		standard
 kern/subr_turnstile.c		standard
 kern/subr_uio.c			standard
diff --git a/sys/kern/subr_ticks.s b/sys/kern/subr_ticks.S
similarity index 62%
rename from sys/kern/subr_ticks.s
rename to sys/kern/subr_ticks.S
index 6565ba42413783..52435b194c4f07 100644
--- a/sys/kern/subr_ticks.s
+++ b/sys/kern/subr_ticks.S
@@ -16,29 +16,21 @@
 GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
 #endif
 
-#ifdef _ILP32
-#define	SIZEOF_TICKSL	4
-#define	TICKSL_INIT	.long 0
-#else
-#define	SIZEOF_TICKSL	8
-#define	TICKSL_INIT	.quad 0
-#endif
-
-#if defined(_ILP32) || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define	TICKS_OFFSET	0
 #else
-#define	TICKS_OFFSET	4
+#define	TICKS_OFFSET	(__SIZEOF_LONG__ - __SIZEOF_INT__)
 #endif
 
-	.data
+	.bss
 
 	.global ticksl
 	.type ticksl, %object
-	.align SIZEOF_TICKSL
-ticksl:	TICKSL_INIT
-	.size ticksl, SIZEOF_TICKSL
+	.align __SIZEOF_LONG__
+ticksl:	.zero __SIZEOF_LONG__
+	.size ticksl, __SIZEOF_LONG__
 
 	.global ticks
 	.type ticks, %object
 ticks	=ticksl + TICKS_OFFSET
-	.size ticks, 4
+	.size ticks, __SIZEOF_INT__

From 550137eab24ab6bdf05bfbb986927004b9f2f34e Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Fri, 10 Jan 2025 22:49:59 -0500
Subject: [PATCH 128/143] universe: Permit requesting a specific version of GCC

If USE_GCC_TOOLCHAINS is set to a value matching the pattern 'gcc*',
use that as the GCC version.  For example, USE_GCC_TOOLCHAINS=gcc16
would use amd64-gcc16 for amd64, etc.  If the variable is set to a
value that doesn't match that pattern, use the default version.

Reviewed by:	imp, emaste
Differential Revision:	https://reviews.freebsd.org/D48418
---
 Makefile               | 16 +++++++++-------
 share/man/man7/build.7 |  7 ++++++-
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index 5c113d5b83cced..1970cc7368296d 100644
--- a/Makefile
+++ b/Makefile
@@ -531,13 +531,15 @@ TARGET_ARCHES_${target}= ${MACHINE_ARCH_LIST_${target}}
 .endfor
 
 .if defined(USE_GCC_TOOLCHAINS)
-TOOLCHAINS_amd64=	amd64-gcc12
-TOOLCHAINS_arm=		armv7-gcc12
-TOOLCHAINS_arm64=	aarch64-gcc12
-TOOLCHAINS_i386=	i386-gcc12
-TOOLCHAINS_powerpc=	powerpc-gcc12 powerpc64-gcc12
-TOOLCHAIN_powerpc64=	powerpc64-gcc12
-TOOLCHAINS_riscv=	riscv64-gcc12
+_DEFAULT_GCC_VERSION=	gcc12
+_GCC_VERSION=		${"${USE_GCC_TOOLCHAINS:Mgcc*}" != "":?${USE_GCC_TOOLCHAINS}:${_DEFAULT_GCC_VERSION}}
+TOOLCHAINS_amd64=	amd64-${_GCC_VERSION}
+TOOLCHAINS_arm=		armv7-${_GCC_VERSION}
+TOOLCHAINS_arm64=	aarch64-${_GCC_VERSION}
+TOOLCHAINS_i386=	i386-${_GCC_VERSION}
+TOOLCHAINS_powerpc=	powerpc-${_GCC_VERSION} powerpc64-${_GCC_VERSION}
+TOOLCHAIN_powerpc64=	powerpc64-${_GCC_VERSION}
+TOOLCHAINS_riscv=	riscv64-${_GCC_VERSION}
 .endif
 
 # If a target is using an external toolchain, set MAKE_PARAMS to enable use
diff --git a/share/man/man7/build.7 b/share/man/man7/build.7
index 32c33aa3d7305d..5ca44c51cf56cf 100644
--- a/share/man/man7/build.7
+++ b/share/man/man7/build.7
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd November 13, 2024
+.Dd January 10, 2025
 .Dt BUILD 7
 .Os
 .Sh NAME
@@ -879,6 +879,11 @@ This variable implies
 Use external GCC toolchains to build the requested targets.
 If the required toolchain package for a supported architecture is not installed,
 the build for that architecture is skipped.
+.Pp
+A specific version of GCC can be used by setting the value of this variable
+to the desired version
+.Pq for example, Dq gcc14 ;
+otherwise a default version of GCC is used.
 .It Va TARGETS
 Only build the listed targets instead of each supported architecture.
 .It Va EXTRA_TARGETS

From 84a62d974e744fbe00ee542fc5f95e99c528bd0a Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Fri, 10 Jan 2025 22:58:09 -0500
Subject: [PATCH 129/143] mlx.4: Remove stale diagnostic message

This message would never have been omitted before since
bus_generic_attach never fails.

Reviewed by:	ziaee, emaste
Fixes:	18250ec6c089 Replace calls to bus_generic_attach with bus_attach_children
Differential Revision:	https://reviews.freebsd.org/D48402
---
 share/man/man4/mlx.4 | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/share/man/man4/mlx.4 b/share/man/man4/mlx.4
index f7a0d64e9e7392..7cb3e9a98c1de3 100644
--- a/share/man/man4/mlx.4
+++ b/share/man/man4/mlx.4
@@ -23,7 +23,7 @@
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd February 15, 2017
+.Dd January 10, 2025
 .Dt MLX 4
 .Os
 .Sh NAME
@@ -146,7 +146,6 @@ controller.
 The current status of all system drives could not be fetched; attachment
 of system drives will be aborted.
 .It mlx%d: device_add_child failed
-.It mlx%d: bus_generic_attach returned %d
 .Pp
 Creation of the system drive instances failed; attachment of one or more
 system drives may have been aborted.

From 4378bd382ea0f5707099273b1fa2393979a22628 Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Fri, 10 Jan 2025 23:01:21 -0500
Subject: [PATCH 130/143] stand/kshim: Replace bus_generic_attach with
 bus_attach_children

Fixes:	18250ec6c089 Replace calls to bus_generic_attach with bus_attach_children
Differential Revision:	https://reviews.freebsd.org/D48404
---
 stand/kshim/bsd_kernel.c | 6 ++----
 stand/kshim/bsd_kernel.h | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/stand/kshim/bsd_kernel.c b/stand/kshim/bsd_kernel.c
index ac552d86603fc9..fb1b4239fd0398 100644
--- a/stand/kshim/bsd_kernel.c
+++ b/stand/kshim/bsd_kernel.c
@@ -205,16 +205,14 @@ bus_release_resource(device_t dev, int type, int rid, struct resource *r)
 	return (EINVAL);
 }
 
-int
-bus_generic_attach(device_t dev)
+void
+bus_attach_children(device_t dev)
 {
 	device_t child;
 
 	TAILQ_FOREACH(child, &dev->dev_children, dev_link) {
 		device_probe_and_attach(child);
 	}
-
-	return (0);
 }
 
 bus_space_tag_t
diff --git a/stand/kshim/bsd_kernel.h b/stand/kshim/bsd_kernel.h
index 0608d32f21ec38..0b5d659951a284 100644
--- a/stand/kshim/bsd_kernel.h
+++ b/stand/kshim/bsd_kernel.h
@@ -646,7 +646,7 @@ int bus_release_resource(device_t, int, int, struct resource *);
 void bus_release_resources(device_t, const struct resource_spec *,
     struct resource **);
 struct resource *bus_alloc_resource_any(device_t, int, int *, unsigned int);
-int bus_generic_attach(device_t);
+void bus_attach_children(device_t);
 bus_space_tag_t rman_get_bustag(struct resource *);
 bus_space_handle_t rman_get_bushandle(struct resource *);
 u_long rman_get_size(struct resource *);

From ee15875c01593b287e55147c482b914e3ab01152 Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Fri, 10 Jan 2025 23:01:48 -0500
Subject: [PATCH 131/143] stand/kshim: Update for devclass being removed from
 DRIVER_MODULE

The kshim code abused the devclass argument to DRIVER_MODULE in some
odd ways.  Instead, refactor the devclass handling to more closely
mirror what new-bus does in the kernel by having a linked list of
devclasses looked up by name and associate devices with a devclass.

Devices are now only associated with a module while probing and
attaching.

Reviewed by:	imp, markj
Differential Revision:	https://reviews.freebsd.org/D48409
---
 stand/kshim/bsd_kernel.c         | 140 ++++++++++++++-----------------
 stand/kshim/bsd_kernel.h         |  11 ++-
 stand/usb/storage/umass_common.c |   4 +-
 3 files changed, 71 insertions(+), 84 deletions(-)

diff --git a/stand/kshim/bsd_kernel.c b/stand/kshim/bsd_kernel.c
index fb1b4239fd0398..78b6f9e0b4d419 100644
--- a/stand/kshim/bsd_kernel.c
+++ b/stand/kshim/bsd_kernel.c
@@ -554,6 +554,8 @@ static const char unknown_string[] = { "unknown" };
 
 static TAILQ_HEAD(, module_data) module_head =
     TAILQ_HEAD_INITIALIZER(module_head);
+static TAILQ_HEAD(, devclass) devclasses =
+    TAILQ_HEAD_INITIALIZER(devclasses);
 
 static uint8_t
 devclass_equal(const char *a, const char *b)
@@ -686,58 +688,50 @@ device_get_nameunit(device_t dev)
 	return (unknown_string);
 }
 
-static uint8_t
-devclass_create(devclass_t *dc_pp)
+static devclass_t
+devclass_create(const char *classname)
 {
-	if (dc_pp == NULL) {
-		return (1);
-	}
-	if (dc_pp[0] == NULL) {
-		dc_pp[0] = malloc(sizeof(**(dc_pp)),
-		    M_DEVBUF, M_WAITOK | M_ZERO);
+	devclass_t dc;
 
-		if (dc_pp[0] == NULL) {
-			return (1);
-		}
+	dc = malloc(sizeof(*dc), M_DEVBUF, M_WAITOK | M_ZERO);
+	if (dc == NULL) {
+		return (NULL);
 	}
-	return (0);
+	dc->name = classname;
+	TAILQ_INSERT_TAIL(&devclasses, dc, link);
+	return (dc);
 }
 
-static const struct module_data *
+static devclass_t
 devclass_find_create(const char *classname)
 {
-	const struct module_data *mod;
+	devclass_t dc;
 
-	TAILQ_FOREACH(mod, &module_head, entry) {
-		if (devclass_equal(mod->mod_name, classname)) {
-			if (devclass_create(mod->devclass_pp)) {
-				continue;
-			}
-			return (mod);
-		}
-	}
-	return (NULL);
+	dc = devclass_find(classname);
+	if (dc == NULL)
+		dc = devclass_create(classname);
+	return (dc);
 }
 
 static uint8_t
-devclass_add_device(const struct module_data *mod, device_t dev)
+devclass_add_device(devclass_t dc, device_t dev)
 {
 	device_t *pp_dev;
 	device_t *end;
 	uint8_t unit;
 
-	pp_dev = mod->devclass_pp[0]->dev_list;
+	pp_dev = dc->dev_list;
 	end = pp_dev + DEVCLASS_MAXUNIT;
 	unit = 0;
 
 	while (pp_dev != end) {
 		if (*pp_dev == NULL) {
 			*pp_dev = dev;
+			dev->dev_class = dc;
 			dev->dev_unit = unit;
-			dev->dev_module = mod;
 			snprintf(dev->dev_nameunit,
 			    sizeof(dev->dev_nameunit),
-			    "%s%d", device_get_name(dev), unit);
+			    "%s%d", dc->name, unit);
 			return (0);
 		}
 		pp_dev++;
@@ -748,26 +742,26 @@ devclass_add_device(const struct module_data *mod, device_t dev)
 }
 
 static void
-devclass_delete_device(const struct module_data *mod, device_t dev)
+devclass_delete_device(devclass_t dc, device_t dev)
 {
-	if (mod == NULL) {
+	if (dc == NULL) {
 		return;
 	}
-	mod->devclass_pp[0]->dev_list[dev->dev_unit] = NULL;
-	dev->dev_module = NULL;
+	dc->dev_list[dev->dev_unit] = NULL;
+	dev->dev_class = NULL;
 }
 
 static device_t
 make_device(device_t parent, const char *name)
 {
 	device_t dev = NULL;
-	const struct module_data *mod = NULL;
+	devclass_t dc = NULL;
 
 	if (name) {
 
-		mod = devclass_find_create(name);
+		dc = devclass_find_create(name);
 
-		if (!mod) {
+		if (!dc) {
 
 			DPRINTF("%s:%d:%s: can't find device "
 			    "class %s\n", __FILE__, __LINE__,
@@ -787,7 +781,7 @@ make_device(device_t parent, const char *name)
 
 	if (name) {
 		dev->dev_fixed_class = 1;
-		if (devclass_add_device(mod, dev)) {
+		if (devclass_add_device(dc, dev)) {
 			goto error;
 		}
 	}
@@ -843,7 +837,8 @@ device_delete_child(device_t dev, device_t child)
 		}
 	}
 
-	devclass_delete_device(child->dev_module, child);
+	if (child->dev_class != NULL)
+		devclass_delete_device(child->dev_class, child);
 
 	if (dev != NULL) {
 		/* remove child from parent */
@@ -911,7 +906,7 @@ device_get_method(device_t dev, const char *what)
 const char *
 device_get_name(device_t dev)
 {
-	if (dev == NULL)
+	if (dev == NULL || dev->dev_module == NULL)
 		return (unknown_string);
 
 	return (dev->dev_module->driver->name);
@@ -942,16 +937,34 @@ device_probe_and_attach(device_t dev)
 {
 	const struct module_data *mod;
 	const char *bus_name_parent;
-
-	bus_name_parent = device_get_name(device_get_parent(dev));
+	devclass_t dc;
 
 	if (dev->dev_attached)
 		return (0);		/* fail-safe */
 
-	if (dev->dev_fixed_class) {
+	/*
+         * Find a module for our device, if any
+         */
+	bus_name_parent = device_get_name(device_get_parent(dev));
+
+	TAILQ_FOREACH(mod, &module_head, entry) {
+		if (!devclass_equal(mod->bus_name, bus_name_parent))
+			continue;
+
+		dc = devclass_find(mod->mod_name);
 
-		mod = dev->dev_module;
+		/* Does this device need assigning to the new devclass? */
+		if (dev->dev_class != dc) {
+			if (dev->dev_fixed_class)
+				continue;
+			if (dev->dev_class != NULL)
+				devclass_delete_device(dev->dev_class, dev);
+			if (devclass_add_device(dc, dev)) {
+				continue;
+			}
+		}
 
+		dev->dev_module = mod;
 		if (DEVICE_PROBE(dev) <= 0) {
 
 			if (device_allocate_softc(dev) == 0) {
@@ -963,40 +976,11 @@ device_probe_and_attach(device_t dev)
 				}
 			}
 		}
-		device_detach(dev);
+		/* else try next driver */
 
-		goto error;
-	}
-	/*
-         * Else find a module for our device, if any
-         */
-
-	TAILQ_FOREACH(mod, &module_head, entry) {
-		if (devclass_equal(mod->bus_name, bus_name_parent)) {
-			if (devclass_create(mod->devclass_pp)) {
-				continue;
-			}
-			if (devclass_add_device(mod, dev)) {
-				continue;
-			}
-			if (DEVICE_PROBE(dev) <= 0) {
-
-				if (device_allocate_softc(dev) == 0) {
-
-					if (DEVICE_ATTACH(dev) == 0) {
-						/* success */
-						dev->dev_attached = 1;
-						return (0);
-					}
-				}
-			}
-			/* else try next driver */
-
-			device_detach(dev);
-		}
+		device_detach(dev);
 	}
 
-error:
 	return (ENODEV);
 }
 
@@ -1015,9 +999,10 @@ device_detach(device_t dev)
 		dev->dev_attached = 0;
 	}
 	device_set_softc(dev, NULL);
+	dev->dev_module = NULL;
 
 	if (dev->dev_fixed_class == 0)
-		devclass_delete_device(mod, dev);
+		devclass_delete_device(dev->dev_class, dev);
 
 	return (0);
 }
@@ -1093,11 +1078,11 @@ devclass_get_device(devclass_t dc, int unit)
 devclass_t
 devclass_find(const char *classname)
 {
-	const struct module_data *mod;
+	devclass_t dc;
 
-	TAILQ_FOREACH(mod, &module_head, entry) {
-		if (devclass_equal(mod->driver->name, classname))
-			return (mod->devclass_pp[0]);
+	TAILQ_FOREACH(dc, &devclasses, link) {
+		if (devclass_equal(dc->name, classname))
+			return (dc);
 	}
 	return (NULL);
 }
@@ -1108,6 +1093,7 @@ module_register(void *data)
 	struct module_data *mdata = data;
 
 	TAILQ_INSERT_TAIL(&module_head, mdata, entry);
+	(void)devclass_find_create(mdata->mod_name);
 }
 
 /*------------------------------------------------------------------------*
diff --git a/stand/kshim/bsd_kernel.h b/stand/kshim/bsd_kernel.h
index 0b5d659951a284..8600bd1f31dc3b 100644
--- a/stand/kshim/bsd_kernel.h
+++ b/stand/kshim/bsd_kernel.h
@@ -87,11 +87,11 @@ struct sysctl_req {
 #define	MOD_UNLOAD 2
 #define	DEVMETHOD(what,func) { #what, (void *)&func }
 #define	DEVMETHOD_END {0,0}
-#define	EARLY_DRIVER_MODULE(a, b, c, d, e, f, g)	DRIVER_MODULE(a, b, c, d, e, f)
-#define	DRIVER_MODULE(name, busname, driver, devclass, evh, arg)	\
+#define	EARLY_DRIVER_MODULE(a, b, c, d, e, f)	DRIVER_MODULE(a, b, c, d, e)
+#define	DRIVER_MODULE(name, busname, driver, evh, arg)	\
   static struct module_data bsd_##name##_##busname##_driver_mod = {	\
 	evh, arg, #busname, #name, #busname "/" #name,			\
-	&driver, &devclass, { 0, 0 } };					\
+	&driver, { 0, 0 } };					\
 SYSINIT(bsd_##name##_##busname##_driver_mod, SI_SUB_DRIVERS,		\
   SI_ORDER_MIDDLE, module_register,					\
   &bsd_##name##_##busname##_driver_mod)
@@ -135,6 +135,7 @@ SYSINIT_ENTRY(uniq##_entry, "sysuninit", (subs),	\
 #define	cold 0
 #define	BUS_PROBE_GENERIC 0
 #define	BUS_PROBE_DEFAULT (-20)
+#define	DEVICE_UNIT_ANY -1
 #define	CALLOUT_RETURNUNLOCKED 0x1
 #undef ffs
 #define	ffs(x) __builtin_ffs(x)
@@ -406,6 +407,7 @@ struct device {
 	TAILQ_HEAD(device_list, device) dev_children;
 	TAILQ_ENTRY(device) dev_link;
 
+	devclass_t dev_class;
 	struct device *dev_parent;
 	const struct module_data *dev_module;
 	void   *dev_sc;
@@ -429,6 +431,8 @@ struct device {
 };
 
 struct devclass {
+	TAILQ_ENTRY(devclass) link;
+	const char *name;
 	device_t dev_list[DEVCLASS_MAXUNIT];
 };
 
@@ -445,7 +449,6 @@ struct module_data {
 	const char *mod_name;
 	const char *long_name;
 	const struct driver *driver;
-	struct devclass **devclass_pp;
 	TAILQ_ENTRY(module_data) entry;
 };
 
diff --git a/stand/usb/storage/umass_common.c b/stand/usb/storage/umass_common.c
index 639d70a7f7e603..b62412b136373a 100644
--- a/stand/usb/storage/umass_common.c
+++ b/stand/usb/storage/umass_common.c
@@ -38,8 +38,6 @@ static device_probe_t umass_probe;
 static device_attach_t umass_attach;
 static device_detach_t umass_detach;
 
-static devclass_t umass_devclass;
-
 static device_method_t umass_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, umass_probe),
@@ -54,7 +52,7 @@ static driver_t umass_driver = {
 	.methods = umass_methods,
 };
 
-DRIVER_MODULE(umass, uhub, umass_driver, umass_devclass, NULL, 0);
+DRIVER_MODULE(umass, uhub, umass_driver, NULL, 0);
 
 static int
 umass_probe(device_t dev)

From 8e4535ee5870b76f97f6d33a05e9979dcb75c185 Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Fri, 10 Jan 2025 23:02:27 -0500
Subject: [PATCH 132/143] stand/kshim: Implement bus_detach_children

While here, update bus_generic_detach to delete devices as in the
kernel.

Reviewed by:	imp, markj
Differential Revision:	https://reviews.freebsd.org/D48410
---
 stand/kshim/bsd_kernel.c | 13 ++++++++++++-
 stand/kshim/bsd_kernel.h |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/stand/kshim/bsd_kernel.c b/stand/kshim/bsd_kernel.c
index 78b6f9e0b4d419..371aaa6e96bf9d 100644
--- a/stand/kshim/bsd_kernel.c
+++ b/stand/kshim/bsd_kernel.c
@@ -664,7 +664,7 @@ device_get_unit(device_t dev)
 }
 
 int
-bus_generic_detach(device_t dev)
+bus_detach_children(device_t dev)
 {
 	device_t child;
 	int error;
@@ -679,6 +679,17 @@ bus_generic_detach(device_t dev)
 	return (0);
 }
 
+int
+bus_generic_detach(device_t dev)
+{
+	int error;
+
+	error = bus_detach_children(dev);
+	if (error == 0)
+		error = device_delete_children(dev);
+	return (error);
+}
+
 const char *
 device_get_nameunit(device_t dev)
 {
diff --git a/stand/kshim/bsd_kernel.h b/stand/kshim/bsd_kernel.h
index 8600bd1f31dc3b..25a779d5ea0c5f 100644
--- a/stand/kshim/bsd_kernel.h
+++ b/stand/kshim/bsd_kernel.h
@@ -650,6 +650,7 @@ void bus_release_resources(device_t, const struct resource_spec *,
     struct resource **);
 struct resource *bus_alloc_resource_any(device_t, int, int *, unsigned int);
 void bus_attach_children(device_t);
+int bus_detach_children(device_t);
 bus_space_tag_t rman_get_bustag(struct resource *);
 bus_space_handle_t rman_get_bushandle(struct resource *);
 u_long rman_get_size(struct resource *);

From f6f5aa8a2df5910b0769ab7cd0e6199c9b2ab624 Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Fri, 10 Jan 2025 23:02:46 -0500
Subject: [PATCH 133/143] stand/usb: Quiet warnings so this builds again

Reviewed by:	markj, emaste
Differential Revision:	https://reviews.freebsd.org/D48411
---
 stand/usb/Makefile      | 2 +-
 stand/usb/test/Makefile | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/stand/usb/Makefile b/stand/usb/Makefile
index 0a1c9e0022ee10..a9c20fc340ad9f 100644
--- a/stand/usb/Makefile
+++ b/stand/usb/Makefile
@@ -35,7 +35,7 @@ INTERNALLIB=
 
 CFLAGS+=	-DBOOTPROG=\"usbloader\"
 CFLAGS+=	-ffunction-sections -fdata-sections
-CFLAGS+=	-Wformat -Wall
+CFLAGS+=	-Wformat -Wall -Wno-unused
 CFLAGS+=	-g
 CFLAGS+=	-fno-pic
 
diff --git a/stand/usb/test/Makefile b/stand/usb/test/Makefile
index 74e501a39ae7da..d92ef6dddecba4 100644
--- a/stand/usb/test/Makefile
+++ b/stand/usb/test/Makefile
@@ -35,6 +35,7 @@ PROG=	usbloader
 SRCS= 
 
 CFLAGS+= -Wall
+CFLAGS+= -Wno-error=missing-prototypes -Wno-error=unused-parameter
 CFLAGS+= -g
 
 .if ${MACHINE_CPUARCH} == "amd64"

From 40d7ba08773751ff7d0df1a3f112b32d1d04e5ec Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Fri, 10 Jan 2025 23:03:02 -0500
Subject: [PATCH 134/143] stand/kshim: Replace devclass_equal with calls to
 strcmp

Reviewed by:	imp, markj, emaste
Differential Revision:	https://reviews.freebsd.org/D48412
---
 stand/kshim/bsd_kernel.c | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/stand/kshim/bsd_kernel.c b/stand/kshim/bsd_kernel.c
index 371aaa6e96bf9d..91ca46e18d7490 100644
--- a/stand/kshim/bsd_kernel.c
+++ b/stand/kshim/bsd_kernel.c
@@ -557,27 +557,6 @@ static TAILQ_HEAD(, module_data) module_head =
 static TAILQ_HEAD(, devclass) devclasses =
     TAILQ_HEAD_INITIALIZER(devclasses);
 
-static uint8_t
-devclass_equal(const char *a, const char *b)
-{
-	char ta, tb;
-
-	if (a == b)
-		return (1);
-
-	while (1) {
-		ta = *a;
-		tb = *b;
-		if (ta != tb)
-			return (0);
-		if (ta == 0)
-			break;
-		a++;
-		b++;
-	}
-	return (1);
-}
-
 int
 bus_generic_resume(device_t dev)
 {
@@ -906,7 +885,7 @@ device_get_method(device_t dev, const char *what)
 
 	mtod = dev->dev_module->driver->methods;
 	while (mtod->func != NULL) {
-		if (devclass_equal(mtod->desc, what)) {
+		if (strcmp(mtod->desc, what) == 0) {
 			return (mtod->func);
 		}
 		mtod++;
@@ -959,7 +938,7 @@ device_probe_and_attach(device_t dev)
 	bus_name_parent = device_get_name(device_get_parent(dev));
 
 	TAILQ_FOREACH(mod, &module_head, entry) {
-		if (!devclass_equal(mod->bus_name, bus_name_parent))
+		if (strcmp(mod->bus_name, bus_name_parent) != 0)
 			continue;
 
 		dc = devclass_find(mod->mod_name);
@@ -1092,7 +1071,7 @@ devclass_find(const char *classname)
 	devclass_t dc;
 
 	TAILQ_FOREACH(dc, &devclasses, link) {
-		if (devclass_equal(dc->name, classname))
+		if (strcmp(dc->name, classname) == 0)
 			return (dc);
 	}
 	return (NULL);

From 0fda4ffd69054217096dd1a40355d97be9a8ab94 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Fri, 10 Jan 2025 20:55:39 -0800
Subject: [PATCH 135/143] netlink: augment group writer with priv(9) argument

This will allow to broadcast messages visible only to priveleged
subscribers.

Reviewed by:		melifaro
Differential Revision:	https://reviews.freebsd.org/D48307
---
 sys/netlink/netlink_domain.c         |  8 +++++---
 sys/netlink/netlink_generic.c        |  2 +-
 sys/netlink/netlink_glue.c           |  7 ++++---
 sys/netlink/netlink_message_writer.c |  3 ++-
 sys/netlink/netlink_message_writer.h | 10 ++++++----
 sys/netlink/netlink_sysevent.c       |  2 +-
 sys/netlink/netlink_var.h            |  2 +-
 sys/netlink/route/iface.c            |  5 +++--
 sys/netlink/route/neigh.c            |  2 +-
 sys/netlink/route/nexthop.c          |  4 ++--
 sys/netlink/route/rt.c               |  5 +++--
 11 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
index 922da32bfb6d6c..45d427f4316633 100644
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -47,7 +47,7 @@
 #include <sys/socketvar.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
-#include <sys/priv.h> /* priv_check */
+#include <sys/priv.h>
 #include <sys/uio.h>
 
 #include <netlink/netlink.h>
@@ -225,8 +225,10 @@ nl_send_group(struct nl_writer *nw)
 	NLCTL_RLOCK(ctl);
 
 	CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) {
-		if (nl_isset_group_locked(nlp, nw->group.id) &&
-		    nlp->nl_proto == nw->group.proto) {
+		if ((nw->group.priv == 0 || priv_check_cred(
+		    nlp->nl_socket->so_cred, nw->group.priv) == 0) &&
+		    nlp->nl_proto == nw->group.proto &&
+		    nl_isset_group_locked(nlp, nw->group.id)) {
 			if (nlp_last != NULL) {
 				struct nl_buf *copy;
 
diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c
index 0f960d79f47741..62353149217382 100644
--- a/sys/netlink/netlink_generic.c
+++ b/sys/netlink/netlink_generic.c
@@ -259,7 +259,7 @@ nlctrl_notify(void *arg __unused, const struct genl_family *gf, int cmd)
 	struct nl_writer nw;
 
 	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_GENERIC, ctrl_group_id,
-	    false)) {
+	    0, false)) {
 		NL_LOG(LOG_DEBUG, "error allocating group writer");
 		return;
 	}
diff --git a/sys/netlink/netlink_glue.c b/sys/netlink/netlink_glue.c
index e550a6013654ba..0e8fdc0b054c76 100644
--- a/sys/netlink/netlink_glue.c
+++ b/sys/netlink/netlink_glue.c
@@ -118,7 +118,7 @@ nl_writer_unicast_stub(struct nl_writer *nw, size_t size, struct nlpcb *nlp,
 
 static bool
 nl_writer_group_stub(struct nl_writer *nw, size_t size, uint16_t protocol,
-    uint16_t group_id, bool waitok)
+    uint16_t group_id, int priv, bool waitok)
 {
 	return (get_stub_writer(nw));
 }
@@ -221,9 +221,10 @@ nl_writer_unicast(struct nl_writer *nw, size_t size, struct nlpcb *nlp,
 
 bool
 nl_writer_group(struct nl_writer *nw, size_t size, uint16_t protocol,
-    uint16_t group_id, bool waitok)
+    uint16_t group_id, int priv, bool waitok)
 {
-	return (_nl->nl_writer_group(nw, size, protocol, group_id, waitok));
+	return (_nl->nl_writer_group(nw, size, protocol, group_id, priv,
+	    waitok));
 }
 
 bool
diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c
index 1aebc4690c2d17..8c5b3ec140584c 100644
--- a/sys/netlink/netlink_message_writer.c
+++ b/sys/netlink/netlink_message_writer.c
@@ -86,11 +86,12 @@ _nl_writer_unicast(struct nl_writer *nw, size_t size, struct nlpcb *nlp,
 
 bool
 _nl_writer_group(struct nl_writer *nw, size_t size, uint16_t protocol,
-    uint16_t group_id, bool waitok)
+    uint16_t group_id, int priv, bool waitok)
 {
 	*nw = (struct nl_writer){
 		.group.proto = protocol,
 		.group.id = group_id,
+		.group.priv = priv,
 		.cb = nl_send_group,
 	};
 
diff --git a/sys/netlink/netlink_message_writer.h b/sys/netlink/netlink_message_writer.h
index 1655acb53fef22..83f925e8d93d8e 100644
--- a/sys/netlink/netlink_message_writer.h
+++ b/sys/netlink/netlink_message_writer.h
@@ -50,6 +50,7 @@ struct nl_writer {
 		struct {
 			uint16_t	proto;
 			uint16_t	id;
+			int		priv;
 		} group;
 	};
 	u_int		num_messages;	/* Number of messages in the buffer */
@@ -67,7 +68,8 @@ struct nl_writer {
 /* Provide optimized calls to the functions inside the same linking unit */
 
 bool _nl_writer_unicast(struct nl_writer *, size_t, struct nlpcb *nlp, bool);
-bool _nl_writer_group(struct nl_writer *, size_t, uint16_t, uint16_t, bool);
+bool _nl_writer_group(struct nl_writer *, size_t, uint16_t, uint16_t, int,
+    bool);
 bool _nlmsg_flush(struct nl_writer *nw);
 void _nlmsg_ignore_limit(struct nl_writer *nw);
 
@@ -89,9 +91,9 @@ nl_writer_unicast(struct nl_writer *nw, size_t size, struct nlpcb *nlp,
 
 static inline bool
 nl_writer_group(struct nl_writer *nw, size_t size, uint16_t proto,
-    uint16_t group_id, bool waitok)
+    uint16_t group_id, int priv, bool waitok)
 {
-	return (_nl_writer_group(nw, size, proto, group_id, waitok));
+	return (_nl_writer_group(nw, size, proto, group_id, priv, waitok));
 }
 
 static inline bool
@@ -141,7 +143,7 @@ nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 /* Provide access to the functions via netlink_glue.c */
 
 bool nl_writer_unicast(struct nl_writer *, size_t, struct nlpcb *, bool waitok);
-bool nl_writer_group(struct nl_writer *, size_t, uint16_t, uint16_t,
+bool nl_writer_group(struct nl_writer *, size_t, uint16_t, uint16_t, int,
     bool waitok);
 bool nlmsg_flush(struct nl_writer *nw);
 void nlmsg_ignore_limit(struct nl_writer *nw);
diff --git a/sys/netlink/netlink_sysevent.c b/sys/netlink/netlink_sysevent.c
index 3359c77fa303b1..c955ce2e8b4528 100644
--- a/sys/netlink/netlink_sysevent.c
+++ b/sys/netlink/netlink_sysevent.c
@@ -82,7 +82,7 @@ sysevent_write(struct sysevent_group *se, const char *subsystem, const char *typ
 {
 	struct nl_writer nw;
 
-	if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_GENERIC, se->id,
+	if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_GENERIC, se->id, 0,
 	    false)) {
 		NL_LOG(LOG_DEBUG, "error allocating group writer");
 		return;
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
index 8efe84e935c3e5..34cba0b28d2712 100644
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -187,7 +187,7 @@ struct nl_function_wrapper {
 	bool (*nl_writer_unicast)(struct nl_writer *nw, size_t size,
 	    struct nlpcb *nlp, bool waitok);
 	bool (*nl_writer_group)(struct nl_writer *nw, size_t size,
-	    uint16_t protocol, uint16_t group_id, bool waitok);
+	    uint16_t protocol, uint16_t group_id, int priv, bool waitok);
 	bool (*nlmsg_end_dump)(struct nl_writer *nw, int error, struct nlmsghdr *hdr);
 	int (*nl_modify_ifp_generic)(struct ifnet *ifp, struct nl_parsed_link *lattrs,
 	    const struct nlattr_bmask *bm, struct nl_pstate *npt);
diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c
index 86b1f8f1b1bc9c..d856498b975f34 100644
--- a/sys/netlink/route/iface.c
+++ b/sys/netlink/route/iface.c
@@ -1386,7 +1386,8 @@ rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
 	if (!nl_has_listeners(NETLINK_ROUTE, group))
 		return;
 
-	if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, group, false)) {
+	if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, group, 0,
+	    false)) {
 		NL_LOG(LOG_DEBUG, "error allocating group writer");
 		return;
 	}
@@ -1406,7 +1407,7 @@ rtnl_handle_ifevent(if_t ifp, int nlmsg_type, int if_flags_mask)
 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
 		return;
 
-	if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK,
+	if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK, 0,
 	    false)) {
 		NL_LOG(LOG_DEBUG, "error allocating group writer");
 		return;
diff --git a/sys/netlink/route/neigh.c b/sys/netlink/route/neigh.c
index ec58c6140db884..9eaaae26325420 100644
--- a/sys/netlink/route/neigh.c
+++ b/sys/netlink/route/neigh.c
@@ -566,7 +566,7 @@ rtnl_lle_event(void *arg __unused, struct llentry *lle, int evt)
 
 	int nlmsgs_type = evt == LLENTRY_RESOLVED ? NL_RTM_NEWNEIGH : NL_RTM_DELNEIGH;
 
-	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEIGH,
+	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEIGH, 0,
 	    false)) {
 		NL_LOG(LOG_DEBUG, "error allocating group writer");
 		return;
diff --git a/sys/netlink/route/nexthop.c b/sys/netlink/route/nexthop.c
index 03f1a57fd1e4a4..30aa3dd7253474 100644
--- a/sys/netlink/route/nexthop.c
+++ b/sys/netlink/route/nexthop.c
@@ -554,7 +554,7 @@ delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx)
 	};
 
 	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP,
-	    false)) {
+	    0, false)) {
 		NL_LOG(LOG_DEBUG, "error allocating message writer");
 		return (ENOMEM);
 	}
@@ -949,7 +949,7 @@ rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
 	};
 
 	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP,
-	    false)) {
+	    0, false)) {
 		NL_LOG(LOG_DEBUG, "error allocating message writer");
 		return (ENOMEM);
 	}
diff --git a/sys/netlink/route/rt.c b/sys/netlink/route/rt.c
index 14bd73d3341166..e90debee46da6e 100644
--- a/sys/netlink/route/rt.c
+++ b/sys/netlink/route/rt.c
@@ -353,7 +353,8 @@ report_operation(uint32_t fibnum, struct rib_cmd_info *rc,
 	struct nl_writer nw;
 	uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt));
 
-	if (nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, false)) {
+	if (nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, 0,
+	    false)) {
 		struct route_nhop_data rnd = {
 			.rnd_nhop = rc_get_nhop(rc),
 			.rnd_weight = rc->rc_nh_weight,
@@ -1082,7 +1083,7 @@ rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
 	};
 
 	uint32_t group_id = family_to_group(family);
-	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id,
+	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, 0,
 	    false)) {
 		NL_LOG(LOG_DEBUG, "error allocating event buffer");
 		return;

From 8a8d095718cb4e3ce84bef1cd61c20b518b8d047 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Fri, 10 Jan 2025 20:55:50 -0800
Subject: [PATCH 136/143] netlink: add snl(3) primitive to obtain group ID

using the family name and the group name as lookup arguments.

Reviewed by:		melifaro
Differential Revision:	https://reviews.freebsd.org/D48308
---
 sys/netlink/netlink_snl_generic.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/sys/netlink/netlink_snl_generic.h b/sys/netlink/netlink_snl_generic.h
index 0a2913c9155ee1..32b460c612bd17 100644
--- a/sys/netlink/netlink_snl_generic.h
+++ b/sys/netlink/netlink_snl_generic.h
@@ -127,6 +127,24 @@ snl_get_genl_family(struct snl_state *ss, const char *family_name)
 	return (attrs.family_id);
 }
 
+static inline uint16_t
+snl_get_genl_mcast_group(struct snl_state *ss, const char *family_name,
+    const char *group_name, uint16_t *family_id)
+{
+	struct _getfamily_attrs attrs = {};
+
+	snl_get_genl_family_info(ss, family_name, &attrs);
+	if (attrs.family_id == 0)
+		return (0);
+	if (family_id != NULL)
+		*family_id = attrs.family_id;
+	for (u_int i = 0; i < attrs.mcast_groups.num_groups; i++)
+		if (strcmp(attrs.mcast_groups.groups[i]->mcast_grp_name,
+                    group_name) == 0)
+			return (attrs.mcast_groups.groups[i]->mcast_grp_id);
+	return (0);
+}
+
 static const struct snl_hdr_parser *snl_all_genl_parsers[] = {
 	&_genl_ctrl_getfam_parser, &_genl_ctrl_mc_parser,
 };

From bbe6559cf958a9016cb18ff1833ebd3a884f349f Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Fri, 10 Jan 2025 20:57:55 -0800
Subject: [PATCH 137/143] netlink: fix size comparison

We want to check the size of the header, not a pointer to it.

Reviewed by:		melifaro, markj
Differential Revision:	https://reviews.freebsd.org/D48309
---
 sys/netlink/netlink_generic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c
index 62353149217382..b78ab80ab3c27c 100644
--- a/sys/netlink/netlink_generic.c
+++ b/sys/netlink/netlink_generic.c
@@ -67,7 +67,8 @@ genl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt)
 		return (ENOTSUP);
 	}
 
-	if (__predict_false(hdr->nlmsg_len < sizeof(hdr) + GENL_HDRLEN)) {
+	if (__predict_false(hdr->nlmsg_len < sizeof(struct nlmsghdr) +
+	    GENL_HDRLEN)) {
 		NLP_LOG(LOG_DEBUG, nlp, "invalid message size: %d", hdr->nlmsg_len);
 		return (EINVAL);
 	}

From 26d1ad5a44e17d2d8c48d36755567043349d8b63 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Fri, 10 Jan 2025 20:58:08 -0800
Subject: [PATCH 138/143] netlink: snl_create_genl_msg_request() may fail due
 to ENOMEM

Reviewed by:		melifaro
Differential Revision:	https://reviews.freebsd.org/D48310
---
 sys/netlink/netlink_snl_generic.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/sys/netlink/netlink_snl_generic.h b/sys/netlink/netlink_snl_generic.h
index 32b460c612bd17..e2dc4d1bfffe9b 100644
--- a/sys/netlink/netlink_snl_generic.h
+++ b/sys/netlink/netlink_snl_generic.h
@@ -33,16 +33,24 @@
 
 /* Genetlink helpers */
 static inline struct nlmsghdr *
-snl_create_genl_msg_request(struct snl_writer *nw, int genl_family, uint8_t genl_cmd)
+snl_create_genl_msg_request(struct snl_writer *nw, uint16_t genl_family,
+    uint8_t genl_cmd)
 {
+	struct nlmsghdr *hdr;
+	struct genlmsghdr *ghdr;
+
 	assert(nw->hdr == NULL);
 
-	struct nlmsghdr *hdr = snl_reserve_msg_object(nw, struct nlmsghdr);
+	hdr = snl_reserve_msg_object(nw, struct nlmsghdr);
+	if (__predict_false(hdr == NULL))
+		return (NULL);
 	hdr->nlmsg_type = genl_family;
 	hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-	nw->hdr = hdr;
-	struct genlmsghdr *ghdr = snl_reserve_msg_object(nw, struct genlmsghdr);
+	ghdr = snl_reserve_msg_object(nw, struct genlmsghdr);
+	if (__predict_false(ghdr == NULL))
+		return (NULL);
 	ghdr->cmd = genl_cmd;
+	nw->hdr = hdr;
 
 	return (hdr);
 }

From 926d2eadcb671dd26431a1082d4c49c3d5ad7f22 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Fri, 10 Jan 2025 20:59:29 -0800
Subject: [PATCH 139/143] netlink: some refactoring of NETLINK_GENERIC layer

- Statically initialize control family/group.  This removes extra startup
code and provides a strong guarantee that they reside at the 0 index of
the respective arrays.  Before a genl_register_family() with a higher
SYSINIT order could try to hijack index 0.

- Remove the family_id field completely.  Now the family ID as well as
group ID are array indices and there is basically no place for a mistake.
Previous code had a bug where a KPI user could induce an ID mismatch.

- Merge netlink_generic_kpi.c to netlink_generic.c.  Both files are small
and now there is more dependency between the control family and the family
allocator. Ok'ed by melifaro@.

Reviewed by:		melifaro
Differential Revision:	https://reviews.freebsd.org/D48316
---
 sys/conf/files                    |   1 -
 sys/netlink/netlink_ctl.h         |   2 +-
 sys/netlink/netlink_generic.c     | 291 +++++++++++++++++++++++++++---
 sys/netlink/netlink_generic_kpi.c | 276 ----------------------------
 sys/netlink/netlink_var.h         |   4 +-
 5 files changed, 273 insertions(+), 301 deletions(-)
 delete mode 100644 sys/netlink/netlink_generic_kpi.c

diff --git a/sys/conf/files b/sys/conf/files
index fc9108b5e10f3b..a02174f3d9548b 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4481,7 +4481,6 @@ netipsec/xform_ipcomp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_tcp.c		optional ipsec inet tcp_signature | \
 	 ipsec inet6 tcp_signature | ipsec_support inet tcp_signature | \
 	 ipsec_support inet6 tcp_signature
-netlink/netlink_generic_kpi.c	standard
 netlink/netlink_glue.c		standard
 netlink/netlink_message_parser.c	standard
 netlink/netlink_domain.c	optional netlink
diff --git a/sys/netlink/netlink_ctl.h b/sys/netlink/netlink_ctl.h
index 95b79c763ccdc3..a23e9a3a948f0d 100644
--- a/sys/netlink/netlink_ctl.h
+++ b/sys/netlink/netlink_ctl.h
@@ -92,7 +92,7 @@ struct genl_cmd {
 	uint32_t	cmd_num;
 };
 
-uint32_t genl_register_family(const char *family_name, size_t hdrsize,
+uint16_t genl_register_family(const char *family_name, size_t hdrsize,
     uint16_t family_version, uint16_t max_attr_idx);
 bool genl_unregister_family(const char *family_name);
 bool genl_register_cmds(const char *family_name, const struct genl_cmd *cmds,
diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c
index b78ab80ab3c27c..d4c84a34b850fa 100644
--- a/sys/netlink/netlink_generic.c
+++ b/sys/netlink/netlink_generic.c
@@ -119,7 +119,7 @@ dump_family(struct nlmsghdr *hdr, struct genlmsghdr *ghdr,
 	ghdr_new->reserved = 0;
 
         nlattr_add_string(nw, CTRL_ATTR_FAMILY_NAME, gf->family_name);
-        nlattr_add_u16(nw, CTRL_ATTR_FAMILY_ID, gf->family_id);
+        nlattr_add_u16(nw, CTRL_ATTR_FAMILY_ID, genl_get_family_id(gf));
         nlattr_add_u32(nw, CTRL_ATTR_VERSION, gf->family_version);
         nlattr_add_u32(nw, CTRL_ATTR_HDRSIZE, gf->family_hdrsize);
         nlattr_add_u32(nw, CTRL_ATTR_MAXATTR, gf->family_attr_max);
@@ -173,9 +173,6 @@ dump_family(struct nlmsghdr *hdr, struct genlmsghdr *ghdr,
 static void nlctrl_notify(void *arg, const struct genl_family *gf, int action);
 static eventhandler_tag family_event_tag;
 
-static uint32_t ctrl_family_id;
-static uint32_t ctrl_group_id;
-
 struct nl_parsed_family {
 	uint32_t	family_id;
 	char		*family_name;
@@ -201,7 +198,7 @@ match_family(const struct genl_family *gf, const struct nl_parsed_family *attrs)
 {
 	if (gf->family_name == NULL)
 		return (false);
-	if (attrs->family_id != 0 && attrs->family_id != gf->family_id)
+	if (attrs->family_id != 0 && attrs->family_id != genl_get_family_id(gf))
 		return (false);
 	if (attrs->family_name != NULL && strcmp(attrs->family_name, gf->family_name))
 		return (false);
@@ -259,7 +256,7 @@ nlctrl_notify(void *arg __unused, const struct genl_family *gf, int cmd)
 	struct genlmsghdr ghdr = { .cmd = cmd };
 	struct nl_writer nw;
 
-	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_GENERIC, ctrl_group_id,
+	if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_GENERIC, CTRL_GROUP_ID,
 	    0, false)) {
 		NL_LOG(LOG_DEBUG, "error allocating group writer");
 		return;
@@ -269,27 +266,16 @@ nlctrl_notify(void *arg __unused, const struct genl_family *gf, int cmd)
 	nlmsg_flush(&nw);
 }
 
-static const struct genl_cmd nlctrl_cmds[] = {
-	{
-		.cmd_num = CTRL_CMD_GETFAMILY,
-		.cmd_name = "GETFAMILY",
-		.cmd_cb = nlctrl_handle_getfamily,
-		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
-	},
-};
-
 static const struct nlhdr_parser *all_parsers[] = { &genl_parser };
 
 static void
 genl_load_all(void *u __unused)
 {
 	NL_VERIFY_PARSERS(all_parsers);
-	ctrl_family_id = genl_register_family(CTRL_FAMILY_NAME, 0, 2, CTRL_ATTR_MAX);
-	genl_register_cmds(CTRL_FAMILY_NAME, nlctrl_cmds, nitems(nlctrl_cmds));
-	ctrl_group_id = genl_register_group(CTRL_FAMILY_NAME, "notify");
-	family_event_tag = EVENTHANDLER_REGISTER(genl_family_event, nlctrl_notify, NULL,
-	    EVENTHANDLER_PRI_ANY);
-	netlink_register_proto(NETLINK_GENERIC, "NETLINK_GENERIC", genl_handle_message);
+	family_event_tag = EVENTHANDLER_REGISTER(genl_family_event,
+	    nlctrl_notify, NULL, EVENTHANDLER_PRI_ANY);
+	netlink_register_proto(NETLINK_GENERIC, "NETLINK_GENERIC",
+	    genl_handle_message);
 }
 SYSINIT(genl_load_all, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_load_all, NULL);
 
@@ -298,7 +284,268 @@ genl_unload(void *u __unused)
 {
 	netlink_unregister_proto(NETLINK_GENERIC);
 	EVENTHANDLER_DEREGISTER(genl_family_event, family_event_tag);
-	genl_unregister_family(CTRL_FAMILY_NAME);
 	NET_EPOCH_WAIT();
 }
 SYSUNINIT(genl_unload, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_unload, NULL);
+
+/*
+ * Public KPI for NETLINK_GENERIC families/groups registration logic below.
+ */
+
+static struct sx sx_lock;
+SX_SYSINIT(genl_lock, &sx_lock, "genetlink lock");
+#define	GENL_LOCK()		sx_xlock(&sx_lock)
+#define	GENL_UNLOCK()		sx_xunlock(&sx_lock)
+#define	GENL_ASSERT_LOCKED()	sx_assert(&sx_lock, SA_LOCKED)
+#define	GENL_ASSERT_XLOCKED()	sx_assert(&sx_lock, SA_XLOCKED)
+
+static struct genl_cmd nlctrl_cmds[] = {
+	[CTRL_CMD_GETFAMILY] = {
+		.cmd_num = CTRL_CMD_GETFAMILY,
+		.cmd_name = "GETFAMILY",
+		.cmd_cb = nlctrl_handle_getfamily,
+		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP |
+		    GENL_CMD_CAP_HASPOL,
+	},
+};
+
+static struct genl_family families[MAX_FAMILIES] = {
+	[CTRL_FAMILY_ID] = {
+		.family_name = CTRL_FAMILY_NAME,
+		.family_hdrsize = 0,
+		.family_version = 2,
+		.family_attr_max = CTRL_ATTR_MAX,
+		.family_cmd_size = CTRL_CMD_GETFAMILY + 1,
+		.family_cmds = nlctrl_cmds,
+		.family_num_groups = 1,
+	},
+}
+;
+static struct genl_group groups[MAX_GROUPS] = {
+	[CTRL_GROUP_ID] = {
+		.group_family = &families[CTRL_FAMILY_ID],
+		.group_name = CTRL_GROUP_NAME,
+	},
+};
+
+static struct genl_family *
+find_family(const char *family_name)
+{
+	GENL_ASSERT_LOCKED();
+	for (u_int i = 0; i < MAX_FAMILIES; i++)
+		if (families[i].family_name != NULL &&
+		    strcmp(families[i].family_name, family_name) == 0)
+			return (&families[i]);
+
+	return (NULL);
+}
+
+static struct genl_family *
+find_empty_family_id(const char *family_name)
+{
+	GENL_ASSERT_LOCKED();
+	/* Microoptimization: index 0 is reserved for the control family */
+	for (u_int i = 1; i < MAX_FAMILIES; i++)
+		if (families[i].family_name == NULL)
+			return (&families[i]);
+
+	return (NULL);
+}
+
+uint16_t
+genl_register_family(const char *family_name, size_t hdrsize,
+    uint16_t family_version, uint16_t max_attr_idx)
+{
+	struct genl_family *gf;
+	uint16_t family_id;
+
+	GENL_LOCK();
+	if (find_family(family_name) != NULL) {
+		GENL_UNLOCK();
+		return (0);
+	}
+
+	gf = find_empty_family_id(family_name);
+	KASSERT(gf, ("%s: maximum of %u generic netlink families allocated",
+	    __func__, MAX_FAMILIES));
+
+	*gf = (struct genl_family) {
+	    .family_name = family_name,
+	    .family_version = family_version,
+	    .family_hdrsize = hdrsize,
+	    .family_attr_max = max_attr_idx,
+	};
+	family_id = genl_get_family_id(gf);
+	GENL_UNLOCK();
+
+	NL_LOG(LOG_DEBUG2, "Registered family %s id %d", gf->family_name,
+	    family_id);
+	EVENTHANDLER_INVOKE(genl_family_event, gf, CTRL_CMD_NEWFAMILY);
+
+	return (family_id);
+}
+
+static void
+free_family(struct genl_family *gf)
+{
+	if (gf->family_cmds != NULL)
+		free(gf->family_cmds, M_NETLINK);
+}
+
+/*
+ * unregister groups of a given family
+ */
+static void
+unregister_groups(const struct genl_family *gf)
+{
+
+	for (u_int i = 0; i < MAX_GROUPS; i++) {
+		struct genl_group *gg = &groups[i];
+		if (gg->group_family == gf && gg->group_name != NULL) {
+			gg->group_family = NULL;
+			gg->group_name = NULL;
+		}
+	}
+}
+
+/*
+ * Can sleep, I guess
+ */
+bool
+genl_unregister_family(const char *family_name)
+{
+	bool found = false;
+
+	GENL_LOCK();
+	struct genl_family *gf = find_family(family_name);
+
+	if (gf != NULL) {
+		EVENTHANDLER_INVOKE(genl_family_event, gf, CTRL_CMD_DELFAMILY);
+		found = true;
+		unregister_groups(gf);
+		/* TODO: zero pointer first */
+		free_family(gf);
+		bzero(gf, sizeof(*gf));
+	}
+	GENL_UNLOCK();
+
+	return (found);
+}
+
+bool
+genl_register_cmds(const char *family_name, const struct genl_cmd *cmds,
+    int count)
+{
+	struct genl_family *gf;
+	uint16_t cmd_size;
+
+	GENL_LOCK();
+	if ((gf = find_family(family_name)) == NULL) {
+		GENL_UNLOCK();
+		return (false);
+	}
+
+	cmd_size = gf->family_cmd_size;
+
+	for (u_int i = 0; i < count; i++) {
+		MPASS(cmds[i].cmd_cb != NULL);
+		if (cmds[i].cmd_num >= cmd_size)
+			cmd_size = cmds[i].cmd_num + 1;
+	}
+
+	if (cmd_size > gf->family_cmd_size) {
+		void *old_data;
+
+		/* need to realloc */
+		size_t sz = cmd_size * sizeof(struct genl_cmd);
+		void *data = malloc(sz, M_NETLINK, M_WAITOK | M_ZERO);
+
+		memcpy(data, gf->family_cmds,
+		    gf->family_cmd_size * sizeof(struct genl_cmd));
+		old_data = gf->family_cmds;
+		gf->family_cmds = data;
+		gf->family_cmd_size = cmd_size;
+		free(old_data, M_NETLINK);
+	}
+
+	for (u_int i = 0; i < count; i++) {
+		const struct genl_cmd *cmd = &cmds[i];
+
+		MPASS(gf->family_cmds[cmd->cmd_num].cmd_cb == NULL);
+		gf->family_cmds[cmd->cmd_num] = cmds[i];
+		NL_LOG(LOG_DEBUG2, "Adding cmd %s(%d) to family %s",
+		    cmd->cmd_name, cmd->cmd_num, gf->family_name);
+	}
+	GENL_UNLOCK();
+	return (true);
+}
+
+static struct genl_group *
+find_group(const struct genl_family *gf, const char *group_name)
+{
+	for (u_int i = 0; i < MAX_GROUPS; i++) {
+		struct genl_group *gg = &groups[i];
+		if (gg->group_family == gf &&
+		    !strcmp(gg->group_name, group_name))
+			return (gg);
+	}
+	return (NULL);
+}
+
+uint32_t
+genl_register_group(const char *family_name, const char *group_name)
+{
+	struct genl_family *gf;
+	uint32_t group_id = 0;
+
+	MPASS(family_name != NULL);
+	MPASS(group_name != NULL);
+
+	GENL_LOCK();
+	if ((gf = find_family(family_name)) == NULL ||
+	    find_group(gf, group_name) != NULL) {
+		GENL_UNLOCK();
+		return (0);
+	}
+
+	/* Microoptimization: index 0 is reserved for the control family */
+	for (u_int i = 1; i < MAX_GROUPS; i++) {
+		struct genl_group *gg = &groups[i];
+		if (gg->group_family == NULL) {
+			gf->family_num_groups++;
+			gg->group_family = gf;
+			gg->group_name = group_name;
+			group_id = i + MIN_GROUP_NUM;
+			break;
+		}
+	}
+	GENL_UNLOCK();
+
+	return (group_id);
+}
+
+/* accessors */
+struct genl_family *
+genl_get_family(uint16_t family_id)
+{
+	return ((family_id < MAX_FAMILIES) ? &families[family_id] : NULL);
+}
+
+const char *
+genl_get_family_name(const struct genl_family *gf)
+{
+	return (gf->family_name);
+}
+
+uint16_t
+genl_get_family_id(const struct genl_family *gf)
+{
+	MPASS(gf >= &families[0] && gf < &families[MAX_FAMILIES]);
+	return ((uint16_t)(gf - &families[0]) + GENL_MIN_ID);
+}
+
+struct genl_group *
+genl_get_group(uint32_t group_id)
+{
+	return ((group_id < MAX_GROUPS) ? &groups[group_id] : NULL);
+}
diff --git a/sys/netlink/netlink_generic_kpi.c b/sys/netlink/netlink_generic_kpi.c
deleted file mode 100644
index e6125ab893d868..00000000000000
--- a/sys/netlink/netlink_generic_kpi.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/types.h>
-#include <sys/ck.h>
-#include <sys/epoch.h>
-#include <sys/eventhandler.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/sx.h>
-
-#include <netlink/netlink.h>
-#include <netlink/netlink_ctl.h>
-#include <netlink/netlink_generic.h>
-#include <netlink/netlink_var.h>
-
-#define	DEBUG_MOD_NAME	nl_generic_kpi
-#define	DEBUG_MAX_LEVEL	LOG_DEBUG3
-#include <netlink/netlink_debug.h>
-_DECLARE_DEBUG(LOG_INFO);
-
-
-/*
- * NETLINK_GENERIC families/groups registration logic
- */
-
-#define	GENL_LOCK()		sx_xlock(&sx_lock)
-#define	GENL_UNLOCK()		sx_xunlock(&sx_lock)
-static struct sx sx_lock;
-SX_SYSINIT(genl_lock, &sx_lock, "genetlink lock");
-
-static struct genl_family	families[MAX_FAMILIES];
-static struct genl_group	groups[MAX_GROUPS];
-
-static struct genl_family *
-find_family(const char *family_name)
-{
-	for (int i = 0; i < MAX_FAMILIES; i++) {
-		struct genl_family *gf = &families[i];
-		if (gf->family_name != NULL && !strcmp(gf->family_name, family_name))
-			return (gf);
-	}
-
-	return (NULL);
-}
-
-static struct genl_family *
-find_empty_family_id(const char *family_name)
-{
-	struct genl_family *gf = NULL;
-
-	if (!strcmp(family_name, CTRL_FAMILY_NAME)) {
-		gf = &families[0];
-		gf->family_id = GENL_MIN_ID;
-	} else {
-		/* Index 0 is reserved for the control family */
-		for (int i = 1; i < MAX_FAMILIES; i++) {
-			gf = &families[i];
-			if (gf->family_name == NULL) {
-				gf->family_id = GENL_MIN_ID + i;
-				break;
-			}
-		}
-	}
-
-	return (gf);
-}
-
-uint32_t
-genl_register_family(const char *family_name, size_t hdrsize,
-    uint16_t family_version, uint16_t max_attr_idx)
-{
-
-	MPASS(family_name != NULL);
-	if (find_family(family_name) != NULL)
-		return (0);
-
-	GENL_LOCK();
-
-	struct genl_family *gf = find_empty_family_id(family_name);
-	MPASS(gf != NULL);
-
-	gf->family_name = family_name;
-	gf->family_version = family_version;
-	gf->family_hdrsize = hdrsize;
-	gf->family_attr_max = max_attr_idx;
-	NL_LOG(LOG_DEBUG2, "Registered family %s id %d", gf->family_name,
-	    gf->family_id);
-	EVENTHANDLER_INVOKE(genl_family_event, gf, CTRL_CMD_NEWFAMILY);
-
-	GENL_UNLOCK();
-
-	return (gf->family_id);
-}
-
-static void
-free_family(struct genl_family *gf)
-{
-	if (gf->family_cmds != NULL)
-		free(gf->family_cmds, M_NETLINK);
-}
-
-/*
- * unregister groups of a given family
- */
-static void
-unregister_groups(const struct genl_family *gf)
-{
-
-	for (int i = 0; i < MAX_GROUPS; i++) {
-		struct genl_group *gg = &groups[i];
-		if (gg->group_family == gf && gg->group_name != NULL) {
-			gg->group_family = NULL;
-			gg->group_name = NULL;
-		}
-	}
-}
-
-/*
- * Can sleep, I guess
- */
-bool
-genl_unregister_family(const char *family_name)
-{
-	bool found = false;
-
-	GENL_LOCK();
-	struct genl_family *gf = find_family(family_name);
-
-	if (gf != NULL) {
-		EVENTHANDLER_INVOKE(genl_family_event, gf, CTRL_CMD_DELFAMILY);
-		found = true;
-		unregister_groups(gf);
-		/* TODO: zero pointer first */
-		free_family(gf);
-		bzero(gf, sizeof(*gf));
-	}
-	GENL_UNLOCK();
-
-	return (found);
-}
-
-bool
-genl_register_cmds(const char *family_name, const struct genl_cmd *cmds, int count)
-{
-	GENL_LOCK();
-	struct genl_family *gf = find_family(family_name);
-	if (gf == NULL) {
-		GENL_UNLOCK();
-		return (false);
-	}
-
-	int cmd_size = gf->family_cmd_size;
-
-	for (int i = 0; i < count; i++) {
-		MPASS(cmds[i].cmd_cb != NULL);
-		if (cmds[i].cmd_num >= cmd_size)
-			cmd_size = cmds[i].cmd_num + 1;
-	}
-
-	if (cmd_size > gf->family_cmd_size) {
-		/* need to realloc */
-		size_t sz = cmd_size * sizeof(struct genl_cmd);
-		void *data = malloc(sz, M_NETLINK, M_WAITOK | M_ZERO);
-
-		memcpy(data, gf->family_cmds, gf->family_cmd_size * sizeof(struct genl_cmd));
-		void *old_data = gf->family_cmds;
-		gf->family_cmds = data;
-		gf->family_cmd_size = cmd_size;
-		free(old_data, M_NETLINK);
-	}
-
-	for (int i = 0; i < count; i++) {
-		const struct genl_cmd *cmd = &cmds[i];
-		MPASS(gf->family_cmds[cmd->cmd_num].cmd_cb == NULL);
-		gf->family_cmds[cmd->cmd_num] = cmds[i];
-		NL_LOG(LOG_DEBUG2, "Adding cmd %s(%d) to family %s",
-		    cmd->cmd_name, cmd->cmd_num, gf->family_name);
-	}
-	GENL_UNLOCK();
-	return (true);
-}
-
-static struct genl_group *
-find_group(const struct genl_family *gf, const char *group_name)
-{
-	for (int i = 0; i < MAX_GROUPS; i++) {
-		struct genl_group *gg = &groups[i];
-		if (gg->group_family == gf && !strcmp(gg->group_name, group_name))
-			return (gg);
-	}
-	return (NULL);
-}
-
-uint32_t
-genl_register_group(const char *family_name, const char *group_name)
-{
-	uint32_t group_id = 0;
-
-	MPASS(family_name != NULL);
-	MPASS(group_name != NULL);
-
-	GENL_LOCK();
-	struct genl_family *gf = find_family(family_name);
-
-	if (gf == NULL || find_group(gf, group_name) != NULL) {
-		GENL_UNLOCK();
-		return (0);
-	}
-
-	for (int i = 0; i < MAX_GROUPS; i++) {
-		struct genl_group *gg = &groups[i];
-		if (gg->group_family == NULL) {
-			gf->family_num_groups++;
-			gg->group_family = gf;
-			gg->group_name = group_name;
-			group_id = i + MIN_GROUP_NUM;
-			break;
-		}
-	}
-	GENL_UNLOCK();
-
-	return (group_id);
-}
-
-/* accessors */
-struct genl_family *
-genl_get_family(uint16_t family_id)
-{
-	return ((family_id < MAX_FAMILIES) ? &families[family_id] : NULL);
-}
-
-const char *
-genl_get_family_name(const struct genl_family *gf)
-{
-	return (gf->family_name);
-}
-
-uint16_t
-genl_get_family_id(const struct genl_family *gf)
-{
-	return (gf->family_id);
-}
-
-struct genl_group *
-genl_get_group(uint32_t group_id)
-{
-	return ((group_id < MAX_GROUPS) ? &groups[group_id] : NULL);
-}
-
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
index 34cba0b28d2712..87b9f5aaaecdef 100644
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -147,7 +147,6 @@ void nl_buf_free(struct nl_buf *nb);
 struct genl_family {
 	const char	*family_name;
 	uint16_t	family_hdrsize;
-	uint16_t	family_id;
 	uint16_t	family_version;
 	uint16_t	family_attr_max;
 	uint16_t	family_cmd_size;
@@ -168,7 +167,10 @@ struct genl_group *genl_get_group(uint32_t group_id);
 
 #define	MIN_GROUP_NUM	48
 
+#define	CTRL_FAMILY_ID		0
 #define	CTRL_FAMILY_NAME	"nlctrl"
+#define	CTRL_GROUP_ID		0
+#define	CTRL_GROUP_NAME		"notify"
 
 struct ifnet;
 struct nl_parsed_link;

From 4dc1820a16b9b6108e0ff8a0265c08c67fa34146 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Fri, 10 Jan 2025 21:08:02 -0800
Subject: [PATCH 140/143] libbsnmp: avoid division by zero with empty password

PR:		283909
---
 contrib/bsnmp/lib/snmpclient.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/bsnmp/lib/snmpclient.c b/contrib/bsnmp/lib/snmpclient.c
index ab3ac2d18e6464..b312a37ed3edcd 100644
--- a/contrib/bsnmp/lib/snmpclient.c
+++ b/contrib/bsnmp/lib/snmpclient.c
@@ -1792,7 +1792,7 @@ snmp_discover_engine(char *passwd)
 	if (snmp_client.user.auth_proto == SNMP_AUTH_NOAUTH)
 		return (0);
 
-	if (passwd == NULL ||
+	if (passwd == NULL || strlen(passwd) == 0 ||
 	    snmp_passwd_to_keys(&snmp_client.user, passwd) != SNMP_CODE_OK ||
 	    snmp_get_local_keys(&snmp_client.user, snmp_client.engine.engine_id,
 	    snmp_client.engine.engine_len) != SNMP_CODE_OK)

From 3999a860d6e899de98b1025317d2d0ef1f83255f Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Fri, 10 Jan 2025 21:08:02 -0800
Subject: [PATCH 141/143] libbsnmptools: avoid uninitialized
 snmptoolctx->passwd with empty password

The removed check left snmptoolctx->passwd pointer to uninitialized
memory.  Always calling strlcpy(3) would guarantee that with empty
password it will point to empty string.

Submitted by:	markj
PR:		283909
---
 usr.sbin/bsnmpd/tools/libbsnmptools/bsnmptools.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/usr.sbin/bsnmpd/tools/libbsnmptools/bsnmptools.c b/usr.sbin/bsnmpd/tools/libbsnmptools/bsnmptools.c
index a9d44cee43648d..fb09e1ac785eb4 100644
--- a/usr.sbin/bsnmpd/tools/libbsnmptools/bsnmptools.c
+++ b/usr.sbin/bsnmpd/tools/libbsnmptools/bsnmptools.c
@@ -178,8 +178,7 @@ snmptool_init(struct snmp_toolinfo *snmptoolctx)
 			warn("malloc() failed");
 			return (-1);
 		}
-		if (slen > 0)
-			strlcpy(snmptoolctx->passwd, str, slen + 1);
+		strlcpy(snmptoolctx->passwd, str, slen + 1);
 	}
 
 	return (0);

From 76658cd70add383dba14d1f71df3d41dedb77015 Mon Sep 17 00:00:00 2001
From: Ahmad Khalifa <ahmadkhalifa570@gmail.com>
Date: Sat, 11 Jan 2025 15:31:23 +0200
Subject: [PATCH 142/143] install-boot.sh: Expose configuration variables

Expose configuration variables if we're being sourced. This provides a
convenient way to check them in the release scripts.

Signed-off-by: Ahmad Khalifa <ahmadkhalifa570@gmail.com>
---
 tools/boot/install-boot.sh | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/boot/install-boot.sh b/tools/boot/install-boot.sh
index 217bf0ff14571a..dd369dd3201056 100755
--- a/tools/boot/install-boot.sh
+++ b/tools/boot/install-boot.sh
@@ -450,4 +450,18 @@ mbr2=${srcroot}/boot/boot
 # sourced, so we shouldn't run anything.
 if [ -n "${dev}" ]; then
 	eval boot_${geli}_${scheme}_${fs}_${bios} $dev $srcroot $opts || echo "Unsupported boot env: ${geli}-${scheme}-${fs}-${bios}"
+elif [ $(basename "$0") != "install-boot.sh" ]; then
+	# If we're being sourced, give the sourcer
+	# the configuration variables.
+
+	srctop=$(dirname $(realpath $0))/../..
+	_=$IFS
+
+	IFS=$'\n'
+	for i in $(make -C $srctop showconfig)
+	do
+		setvar ${i%%[[:space:]]*=*} ${i##*=[[:space:]]}
+	done
+
+	IFS=$_
 fi

From da3e113138e32bff6e322706ae49187e8100ab39 Mon Sep 17 00:00:00 2001
From: Ahmad Khalifa <ahmadkhalifa570@gmail.com>
Date: Sat, 11 Jan 2025 15:33:20 +0200
Subject: [PATCH 143/143] release/amd64: Check config variables instead of
 checking files

Check the MK_LOADER_IA32 variable instead of manually checking if the
file exists.

Signed-off-by: Ahmad Khalifa <ahmadkhalifa570@gmail.com>
---
 release/amd64/make-memstick.sh | 8 +++-----
 release/amd64/mkisoimages.sh   | 8 +++-----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/release/amd64/make-memstick.sh b/release/amd64/make-memstick.sh
index cbb80e97134350..e140316825bf35 100755
--- a/release/amd64/make-memstick.sh
+++ b/release/amd64/make-memstick.sh
@@ -60,12 +60,10 @@ fi
 
 # Make an ESP in a file.
 espfilename=$(mktemp /tmp/efiboot.XXXXXX)
-if [ -f "${BASEBITSDIR}/boot/loader_ia32.efi" ]; then
-	make_esp_file ${espfilename} ${fat32min} ${BASEBITSDIR}/boot/loader.efi bootx64 \
-	    ${BASEBITSDIR}/boot/loader_ia32.efi bootia32
-else
-	make_esp_file ${espfilename} ${fat32min} ${BASEBITSDIR}/boot/loader.efi
+if [ ${MK_LOADER_IA32} = "yes" ]; then
+	extra_args="${BASEBITSDIR}/boot/loader_ia32.efi bootia32"
 fi
+make_esp_file ${espfilename} ${fat32min} ${BASEBITSDIR}/boot/loader.efi bootx64 ${extra_args}
 
 mkimg -s mbr \
     -b ${BASEBITSDIR}/boot/mbr \
diff --git a/release/amd64/mkisoimages.sh b/release/amd64/mkisoimages.sh
index 245beb660c3fee..51681edf360a05 100644
--- a/release/amd64/mkisoimages.sh
+++ b/release/amd64/mkisoimages.sh
@@ -64,12 +64,10 @@ if [ "$1" = "-b" ]; then
 	espfilename=$(mktemp /tmp/efiboot.XXXXXX)
 	# ESP file size in KB.
 	espsize="2048"
-	if [ -f "${BASEBITSDIR}/boot/loader_ia32.efi" ]; then
-		make_esp_file ${espfilename} ${espsize} ${BASEBITSDIR}/boot/loader.efi bootx64 \
-		    ${BASEBITSDIR}/boot/loader_ia32.efi bootia32
-	else
-		make_esp_file ${espfilename} ${espsize} ${BASEBITSDIR}/boot/loader.efi
+	if [ ${MK_LOADER_IA32} = "yes" ]; then
+		extra_args="${BASEBITSDIR}/boot/loader_ia32.efi bootia32"
 	fi
+	make_esp_file ${espfilename} ${espsize} ${BASEBITSDIR}/boot/loader.efi bootx64 ${extra_args}
 	bootable="$bootable -o bootimage=i386;${espfilename} -o no-emul-boot -o platformid=efi"
 
 	shift