From 34251a01481f96102ec5f2beea9c9965f409feff Mon Sep 17 00:00:00 2001
From: andrewstanfordjason <andrew@xmos.com>
Date: Wed, 28 Jan 2026 15:37:55 +0000
Subject: [PATCH] vx4b initial

---
 doc/rst/src/reference/notes.h                 |   2 +-
 examples/app_bfp_demo/src/main.c              |   2 +-
 examples/app_fft_demo/src/main.c              |   2 +-
 .../app_filter_demo/src/filter_16bit_fir.c    |   2 +-
 .../app_filter_demo/src/filter_32bit_biquad.c |   2 +-
 .../app_filter_demo/src/filter_32bit_fir.c    |   2 +-
 examples/app_filter_demo/src/main.c           |   2 +-
 examples/app_vect_demo/src/main.c             |   2 +-
 .../src/vect_complex_s16_example.c            |   2 +-
 examples/app_vect_demo/src/vect_s32_example.c |   2 +-
 lib_xcore_math/CMakeLists.txt                 |   6 +-
 lib_xcore_math/api/xcore_math.h               |   2 +-
 lib_xcore_math/api/xmath/_support/dct_impl.h  |   2 +-
 lib_xcore_math/api/xmath/_support/fft_impl.h  |   2 +-
 lib_xcore_math/api/xmath/api.h                |   2 +-
 lib_xcore_math/api/xmath/bfp/bfp.h            |   2 +-
 .../api/xmath/bfp/bfp_complex_s16.h           |   2 +-
 .../api/xmath/bfp/bfp_complex_s32.h           |   2 +-
 lib_xcore_math/api/xmath/bfp/bfp_misc.h       |   2 +-
 lib_xcore_math/api/xmath/bfp/bfp_s16.h        |   2 +-
 lib_xcore_math/api/xmath/bfp/bfp_s32.h        |   2 +-
 lib_xcore_math/api/xmath/dct.h                |   2 +-
 lib_xcore_math/api/xmath/fft.h                |   2 +-
 lib_xcore_math/api/xmath/filter.h             |   2 +-
 lib_xcore_math/api/xmath/q_format.h           |   2 +-
 lib_xcore_math/api/xmath/scalar/f32.h         |   2 +-
 .../api/xmath/scalar/float_complex_s16.h      |   2 +-
 .../api/xmath/scalar/float_complex_s32.h      |   2 +-
 lib_xcore_math/api/xmath/scalar/float_s32.h   |   2 +-
 lib_xcore_math/api/xmath/scalar/s16.h         |   2 +-
 lib_xcore_math/api/xmath/scalar/s32.h         |   2 +-
 lib_xcore_math/api/xmath/scalar/scalar.h      |   2 +-
 lib_xcore_math/api/xmath/scalar/scalar_misc.h |   2 +-
 lib_xcore_math/api/xmath/types.h              |   2 +-
 lib_xcore_math/api/xmath/util.h               |   8 +-
 lib_xcore_math/api/xmath/vect/chunk_s32.h     |   2 +-
 lib_xcore_math/api/xmath/vect/vect.h          |   2 +-
 .../api/xmath/vect/vect_complex_s16.h         |   2 +-
 .../api/xmath/vect/vect_complex_s16_prepare.h |   2 +-
 .../api/xmath/vect/vect_complex_s32.h         |   2 +-
 .../api/xmath/vect/vect_complex_s32_prepare.h |   2 +-
 lib_xcore_math/api/xmath/vect/vect_f32.h      |   2 +-
 lib_xcore_math/api/xmath/vect/vect_mixed.h    |   2 +-
 lib_xcore_math/api/xmath/vect/vect_s16.h      |   2 +-
 .../api/xmath/vect/vect_s16_prepare.h         |   2 +-
 lib_xcore_math/api/xmath/vect/vect_s32.h      |   2 +-
 .../api/xmath/vect/vect_s32_prepare.h         |   2 +-
 lib_xcore_math/api/xmath/vect/vect_s8.h       |   2 +-
 lib_xcore_math/api/xmath/xmath.h              |   2 +-
 lib_xcore_math/api/xmath/xmath_conf.h         |   2 +-
 lib_xcore_math/api/xmath/xs3/vpu_info.h       |  20 +-
 lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h |  13 +-
 .../python/gen_biquad_filter_s32.py           |   2 +-
 lib_xcore_math/python/gen_fft_table.py        |   2 +-
 lib_xcore_math/python/gen_fir_filter_s16.py   |   2 +-
 lib_xcore_math/python/gen_fir_filter_s32.py   |   2 +-
 lib_xcore_math/python/gen_rot_table.py        |   2 +-
 lib_xcore_math/python/xmath_script.py         |   2 +-
 .../src/arch/ref/bool/vect_s8_is_negative.c   |   2 +-
 lib_xcore_math/src/arch/ref/chunk.c           |   2 +-
 .../src/arch/ref/chunk_s16_accumulate.c       |   2 +-
 .../arch/ref/complex/vect_complex_conj_macc.c |   2 +-
 .../arch/ref/complex/vect_complex_conjugate.c |   2 +-
 .../ref/complex/vect_complex_depth_convert.c  |   2 +-
 .../src/arch/ref/complex/vect_complex_macc.c  |   2 +-
 .../src/arch/ref/complex/vect_complex_mag.c   |   2 +-
 .../src/arch/ref/complex/vect_complex_mul.c   |   2 +-
 .../src/arch/ref/complex/vect_complex_sum.c   |   2 +-
 lib_xcore_math/src/arch/ref/dct/dct.c         |   2 +-
 lib_xcore_math/src/arch/ref/dct/dct8x8.c      |   2 +-
 lib_xcore_math/src/arch/ref/dct/idct.c        |   2 +-
 lib_xcore_math/src/arch/ref/f32.c             |   2 +-
 lib_xcore_math/src/arch/ref/fft/fft_dif.c     |   2 +-
 lib_xcore_math/src/arch/ref/fft/fft_dit.c     |   2 +-
 lib_xcore_math/src/arch/ref/fft/fft_util.c    |   2 +-
 .../src/arch/ref/filter/filter_biquad_s32.c   |   2 +-
 .../arch/ref/filter/filter_biquad_sat_s32.c   |   2 +-
 .../src/arch/ref/filter/filter_fir_s16.c      |   2 +-
 .../src/arch/ref/filter/filter_fir_s32.c      |   2 +-
 lib_xcore_math/src/arch/ref/float_s32.c       |   2 +-
 .../ref/matrix/mat_mul_s8_x_s8_yield_s32.c    |   2 +-
 lib_xcore_math/src/arch/ref/misc.c            |   2 +-
 lib_xcore_math/src/arch/ref/qXX.c             |   2 +-
 lib_xcore_math/src/arch/ref/s32_sqrt.c        |   2 +-
 .../src/arch/ref/vect_abs_clip_rect.c         |   2 +-
 lib_xcore_math/src/arch/ref/vect_add_sub.c    |   2 +-
 lib_xcore_math/src/arch/ref/vect_convolve.c   |   2 +-
 lib_xcore_math/src/arch/ref/vect_copy.c       |   2 +-
 .../src/arch/ref/vect_depth_convert.c         |   2 +-
 lib_xcore_math/src/arch/ref/vect_dot.c        |   2 +-
 lib_xcore_math/src/arch/ref/vect_f32.c        |   2 +-
 lib_xcore_math/src/arch/ref/vect_headroom.c   |   2 +-
 lib_xcore_math/src/arch/ref/vect_inverse.c    |   2 +-
 lib_xcore_math/src/arch/ref/vect_macc.c       |   2 +-
 lib_xcore_math/src/arch/ref/vect_mul.c        |   2 +-
 .../src/arch/ref/vect_s16_extract.c           |   2 +-
 lib_xcore_math/src/arch/ref/vect_sXX.c        |   2 +-
 lib_xcore_math/src/arch/ref/vect_set.c        |   2 +-
 lib_xcore_math/src/arch/ref/vect_shl.c        |   2 +-
 lib_xcore_math/src/arch/ref/vect_sqrt.c       |   2 +-
 lib_xcore_math/src/arch/ref/vect_stats.c      |   2 +-
 lib_xcore_math/src/arch/ref/vect_sum.c        |   2 +-
 lib_xcore_math/src/arch/ref/vect_zip.c        |   2 +-
 lib_xcore_math/src/arch/ref/vpu_scalar_ops.c  |  29 +-
 lib_xcore_math/src/arch/vx4b/NOTES.rst        |   5 +
 lib_xcore_math/src/arch/vx4b/asm_helper.h     |  25 +
 .../chunk_s16/chunk_s16_accumulate.almost     |  97 +++
 .../vx4b/chunk_s16/chunk_s16_accumulate.c     |  47 ++
 .../src/arch/vx4b/chunk_s32/chunk_s32_dot.S   |  50 ++
 .../src/arch/vx4b/chunk_s32/chunk_s32_log.S   | 176 ++++++
 .../vx4b/chunk_s32/chunk_s32_power_series.S   |  87 +++
 .../chunk_s32/chunk_s32_power_series_v2.S     | 108 ++++
 .../src/arch/vx4b/dct/s32/dct12_s32.S         | 140 +++++
 .../src/arch/vx4b/dct/s32/dct16_s32.S         | 157 +++++
 .../src/arch/vx4b/dct/s32/dct24_s32.S         | 178 ++++++
 .../src/arch/vx4b/dct/s32/dct6_s32.S          |  65 ++
 .../src/arch/vx4b/dct/s32/dct8_s32.S          |  67 +++
 .../src/arch/vx4b/dct/s32/dct_adsb_s32.S      |  78 +++
 .../arch/vx4b/dct/s32/dct_deconvolve_s32.S    |  81 +++
 .../src/arch/vx4b/dct/s32/idct6_s32.S         |  64 ++
 .../src/arch/vx4b/dct/s32/idct8_s32.S         |  65 ++
 .../src/arch/vx4b/dct/s32/idct_adsb.S         |  73 +++
 .../src/arch/vx4b/dct/s32/idct_convolve.S     |  80 +++
 .../src/arch/vx4b/dct/s32/idct_scale.S        |  66 ++
 .../src/arch/vx4b/dct/s8/dct8x8_stageA.S      | 187 ++++++
 .../src/arch/vx4b/dct/s8/dct8x8_stageB.S      | 191 ++++++
 .../src/arch/vx4b/dct/vect_s32_flip.S         |  54 ++
 lib_xcore_math/src/arch/vx4b/fft/dif_fft.S    | 285 +++++++++
 lib_xcore_math/src/arch/vx4b/fft/dit_fft.S    | 316 ++++++++++
 lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S |  24 +
 .../arch/vx4b/fft/fft_index_bit_reversal.S    |  65 ++
 .../src/arch/vx4b/fft/fft_mono_adjust.S       | 218 +++++++
 .../src/arch/vx4b/fft/fft_spectra_merge.S     | 158 +++++
 .../src/arch/vx4b/fft/fft_spectra_split.S     | 150 +++++
 .../arch/vx4b/fft/tail_reverse_complex_s32.S  | 106 ++++
 .../src/arch/vx4b/filter/filter_biquad_s32.S  | 155 +++++
 .../arch/vx4b/filter/filter_biquad_sat_s32.S  | 220 +++++++
 .../src/arch/vx4b/filter/filter_fir_s16.S     | 118 ++++
 .../src/arch/vx4b/filter/filter_fir_s32.S     | 206 +++++++
 .../arch/vx4b/filter/push_sample_down_s16.S   | 127 ++++
 .../src/arch/vx4b/filter/push_sample_up_s16.S | 152 +++++
 .../vx4b/filter/vect_s32_convolve_valid.S     | 130 ++++
 .../vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S   | 128 ++++
 .../src/arch/vx4b/misc/chunk_float_s32_log.S  | 184 ++++++
 lib_xcore_math/src/arch/vx4b/misc/util.S      | 103 ++++
 lib_xcore_math/src/arch/vx4b/misc/vect_copy.S |  60 ++
 .../vx4b/misc/vect_float_s32_ln_prepare.S     | 122 ++++
 .../src/arch/vx4b/misc/xs3_memcpy.S           |  53 ++
 .../src/arch/vx4b/scalar/f32_log2.S           |  69 +++
 .../src/arch/vx4b/scalar/f32_norm.S           |  48 ++
 .../src/arch/vx4b/scalar/f32_power_series.S   | 133 +++++
 lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S | 147 +++++
 .../src/arch/vx4b/scalar/float_s32.c          |  87 +++
 .../src/arch/vx4b/scalar/float_s32_exp.almost | 153 +++++
 .../src/arch/vx4b/scalar/q24_logistic_fast.S  |  80 +++
 .../src/arch/vx4b/scalar/q30_exp_small.S      | 115 ++++
 .../src/arch/vx4b/scalar/q30_odd_powers.S     |  74 +++
 .../src/arch/vx4b/scalar/q30_powers.S         | 110 ++++
 .../src/arch/vx4b/scalar/radians_to_sbrads.S  |  95 +++
 .../src/arch/vx4b/scalar/sbrad_sin.S          | 118 ++++
 .../src/arch/vx4b/scalar/sbrad_tan.S          | 150 +++++
 .../src/arch/vx4b/scalar/scalar_op_s16.S      | 511 ++++++++++++++++
 .../src/arch/vx4b/scalar/scalar_op_s32.S      | 563 ++++++++++++++++++
 .../src/arch/vx4b/scalar/scalar_op_s8.S       | 423 +++++++++++++
 .../src/arch/vx4b/scalar/sqrt_s32.S           | 129 ++++
 .../vect_complex_s16_complex_scale.S          | 205 +++++++
 .../vect_complex_s16_conj_macc.S              | 219 +++++++
 .../vect_complex_s16_conj_nmacc.S             | 211 +++++++
 .../vect_complex_s16_conjugate_mul.S          | 160 +++++
 .../vect_complex_s16/vect_complex_s16_macc.S  | 212 +++++++
 .../vect_complex_s16/vect_complex_s16_mag.S   | 269 +++++++++
 .../vect_complex_s16/vect_complex_s16_mul.S   | 186 ++++++
 .../vect_complex_s16/vect_complex_s16_nmacc.S | 211 +++++++
 .../vect_complex_s16_real_mul.S               | 142 +++++
 .../vect_complex_s16_squared_mag.S            | 120 ++++
 .../vect_complex_s16/vect_complex_s16_sum.S   | 156 +++++
 .../vect_complex_s16_to_complex_s32.S         |  60 ++
 .../vect_complex_s32_complex_scale.S          | 151 +++++
 .../vect_complex_s32_conj_macc.S              | 129 ++++
 .../vect_complex_s32_conj_nmacc.S             | 129 ++++
 .../vect_complex_s32_conjugate.S              |  83 +++
 .../vect_complex_s32_conjugate_mul.S          | 116 ++++
 .../vect_complex_s32/vect_complex_s32_macc.S  | 132 ++++
 .../vect_complex_s32/vect_complex_s32_mag.S   | 166 ++++++
 .../vect_complex_s32/vect_complex_s32_mul.S   | 116 ++++
 .../vect_complex_s32/vect_complex_s32_nmacc.S | 128 ++++
 .../vect_complex_s32_real_mul.S               | 134 +++++
 .../vect_complex_s32_squared_mag.S            | 120 ++++
 .../vect_complex_s32/vect_complex_s32_sum.S   | 150 +++++
 .../vect_complex_s32_to_complex_s16.S         | 107 ++++
 .../vect_f32/vect_complex_f32_conj_macc.S     |  77 +++
 .../vx4b/vect_f32/vect_complex_f32_conj_mul.S |  81 +++
 .../vx4b/vect_f32/vect_complex_f32_macc.S     |  77 +++
 .../arch/vx4b/vect_f32/vect_complex_f32_mul.S |  82 +++
 .../src/arch/vx4b/vect_f32/vect_f32_add.S     |  88 +++
 .../src/arch/vx4b/vect_f32/vect_f32_dot.S     | 118 ++++
 .../vx4b/vect_f32/vect_f32_max_exponent.S     |  72 +++
 .../src/arch/vx4b/vect_f32/vect_f32_to_s32.S  |  90 +++
 .../src/arch/vx4b/vect_s16/vect_s16_abs.S     | 124 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_abs_sum.S | 150 +++++
 .../src/arch/vx4b/vect_s16/vect_s16_argmax.S  | 165 +++++
 .../src/arch/vx4b/vect_s16/vect_s16_argmin.S  | 184 ++++++
 .../src/arch/vx4b/vect_s16/vect_s16_clip.S    | 331 ++++++++++
 .../src/arch/vx4b/vect_s16/vect_s16_dot.c     |  36 ++
 .../src/arch/vx4b/vect_s16/vect_s16_energy.S  | 116 ++++
 .../vect_s16/vect_s16_extract_high_byte.S     | 132 ++++
 .../vx4b/vect_s16/vect_s16_extract_low_byte.S | 129 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_inverse.S |  66 ++
 .../src/arch/vx4b/vect_s16/vect_s16_macc.S    | 122 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_max.S     | 114 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_min.S     | 115 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_mul.S     | 106 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_nmacc.S   | 122 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_scale.S   | 120 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_sqrt.S    | 203 +++++++
 .../src/arch/vx4b/vect_s16/vect_s16_sum.S     | 113 ++++
 .../src/arch/vx4b/vect_s16/vect_s16_to_s32.S  | 111 ++++
 .../src/arch/vx4b/vect_s32/s32_to_chunk_s32.S |  49 ++
 .../src/arch/vx4b/vect_s32/vect_s32_abs.S     | 106 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_abs_sum.S | 109 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_argmax.S  | 154 +++++
 .../src/arch/vx4b/vect_s32/vect_s32_argmin.S  | 161 +++++
 .../src/arch/vx4b/vect_s32/vect_s32_clip.S    | 330 ++++++++++
 .../src/arch/vx4b/vect_s32/vect_s32_dot.S     | 118 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_energy.S  | 111 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_inverse.S | 113 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_macc.S    | 119 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_max.S     | 114 ++++
 .../arch/vx4b/vect_s32/vect_s32_merge_accs.S  | 104 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_min.S     | 114 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_mul.S     | 109 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_nmacc.S   | 119 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_scale.S   | 109 ++++
 .../arch/vx4b/vect_s32/vect_s32_split_accs.S  | 114 ++++
 .../src/arch/vx4b/vect_s32/vect_s32_sqrt.S    | 185 ++++++
 .../src/arch/vx4b/vect_s32/vect_s32_sum.S     |  98 +++
 .../src/arch/vx4b/vect_s32/vect_s32_to_f32.S  |  91 +++
 .../src/arch/vx4b/vect_s32/vect_s32_to_s16.S  |  77 +++
 .../src/arch/vx4b/vect_s32/vect_s32_unzip.S   |  69 +++
 .../src/arch/vx4b/vect_s32/vect_s32_zip.S     | 143 +++++
 .../arch/vx4b/vect_s8/vect_s8_is_negative.S   |  81 +++
 .../src/arch/vx4b/vect_sXX/vect_add.S         | 156 +++++
 .../src/arch/vx4b/vect_sXX/vect_headroom.S    | 134 +++++
 .../src/arch/vx4b/vect_sXX/vect_rect.S        | 138 +++++
 .../arch/vx4b/vect_sXX/vect_sXX_add_scalar.S  | 112 ++++
 .../vx4b/vect_sXX/vect_sXX_max_elementwise.S  | 192 ++++++
 .../vx4b/vect_sXX/vect_sXX_min_elementwise.S  | 193 ++++++
 .../src/arch/vx4b/vect_sXX/vect_set.S         | 133 +++++
 .../src/arch/vx4b/vect_sXX/vect_shl.S         | 169 ++++++
 .../src/arch/vx4b/vect_sXX/vect_sub.S         | 151 +++++
 lib_xcore_math/src/arch/xs3/asm_helper.h      |   2 +-
 .../arch/xs3/chunk_s16/chunk_s16_accumulate.S |   2 +-
 .../src/arch/xs3/chunk_s32/chunk_s32_dot.S    |   2 +-
 .../src/arch/xs3/chunk_s32/chunk_s32_log.S    |   2 +-
 .../xs3/chunk_s32/chunk_s32_power_series.S    |   2 +-
 .../xs3/chunk_s32/chunk_s32_power_series_v2.S |   2 +-
 .../src/arch/xs3/dct/s32/dct12_s32.S          |   2 +-
 .../src/arch/xs3/dct/s32/dct16_s32.S          |   2 +-
 .../src/arch/xs3/dct/s32/dct24_s32.S          |   2 +-
 .../src/arch/xs3/dct/s32/dct6_s32.S           |   2 +-
 .../src/arch/xs3/dct/s32/dct8_s32.S           |   2 +-
 .../src/arch/xs3/dct/s32/dct_adsb_s32.S       |   2 +-
 .../src/arch/xs3/dct/s32/dct_deconvolve_s32.S |   2 +-
 .../src/arch/xs3/dct/s32/idct6_s32.S          |   2 +-
 .../src/arch/xs3/dct/s32/idct8_s32.S          |   2 +-
 .../src/arch/xs3/dct/s32/idct_adsb.S          |   2 +-
 .../src/arch/xs3/dct/s32/idct_convolve.S      |   2 +-
 .../src/arch/xs3/dct/s32/idct_scale.S         |   2 +-
 .../src/arch/xs3/dct/s8/dct8x8_stageA.S       |   2 +-
 .../src/arch/xs3/dct/s8/dct8x8_stageB.S       |   2 +-
 .../src/arch/xs3/dct/vect_s32_flip.S          |   2 +-
 lib_xcore_math/src/arch/xs3/fft/dif_fft.S     |   2 +-
 lib_xcore_math/src/arch/xs3/fft/dit_fft.S     |   2 +-
 lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S  |   2 +-
 .../src/arch/xs3/fft/fft_index_bit_reversal.S |   2 +-
 .../src/arch/xs3/fft/fft_mono_adjust.S        |   2 +-
 .../src/arch/xs3/fft/fft_spectra_merge.S      |   2 +-
 .../src/arch/xs3/fft/fft_spectra_split.S      |   2 +-
 .../arch/xs3/fft/tail_reverse_complex_s32.S   |   2 +-
 .../src/arch/xs3/filter/filter_biquad_s32.S   |   2 +-
 .../arch/xs3/filter/filter_biquad_sat_s32.S   |   2 +-
 .../src/arch/xs3/filter/filter_fir_s16.S      |   2 +-
 .../src/arch/xs3/filter/filter_fir_s32.S      |   2 +-
 .../arch/xs3/filter/push_sample_down_s16.S    |   2 +-
 .../src/arch/xs3/filter/push_sample_up_s16.S  |   2 +-
 .../arch/xs3/filter/vect_s32_convolve_valid.S |   2 +-
 .../xs3/matrix/mat_mul_s8_x_s8_yield_s32.S    |   2 +-
 .../src/arch/xs3/misc/chunk_float_s32_log.S   |   2 +-
 lib_xcore_math/src/arch/xs3/misc/util.S       |   2 +-
 lib_xcore_math/src/arch/xs3/misc/vect_copy.S  |   2 +-
 .../arch/xs3/misc/vect_float_s32_ln_prepare.S |   2 +-
 lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S |   2 +-
 lib_xcore_math/src/arch/xs3/scalar/f32_log2.S |   2 +-
 lib_xcore_math/src/arch/xs3/scalar/f32_norm.S |   2 +-
 .../src/arch/xs3/scalar/f32_power_series.S    |   2 +-
 lib_xcore_math/src/arch/xs3/scalar/f32_sin.S  |   2 +-
 .../src/arch/xs3/scalar/float_s32_exp.S       |   2 +-
 .../src/arch/xs3/scalar/q24_logistic_fast.S   |   2 +-
 .../src/arch/xs3/scalar/q30_exp_small.S       |   2 +-
 .../src/arch/xs3/scalar/q30_odd_powers.S      |   2 +-
 .../src/arch/xs3/scalar/q30_powers.S          |   2 +-
 .../src/arch/xs3/scalar/radians_to_sbrads.S   |   2 +-
 .../src/arch/xs3/scalar/sbrad_sin.S           |   2 +-
 .../src/arch/xs3/scalar/sbrad_tan.S           |   2 +-
 .../src/arch/xs3/scalar/scalar_op_s16.S       |   2 +-
 .../src/arch/xs3/scalar/scalar_op_s32.S       |   2 +-
 .../src/arch/xs3/scalar/scalar_op_s8.S        |   2 +-
 lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S |   2 +-
 .../vect_complex_s16_complex_scale.S          |   2 +-
 .../vect_complex_s16_conj_macc.S              |   2 +-
 .../vect_complex_s16_conj_nmacc.S             |   2 +-
 .../vect_complex_s16_conjugate_mul.S          |   2 +-
 .../vect_complex_s16/vect_complex_s16_macc.S  |   2 +-
 .../vect_complex_s16/vect_complex_s16_mag.S   |   2 +-
 .../vect_complex_s16/vect_complex_s16_mul.S   |   2 +-
 .../vect_complex_s16/vect_complex_s16_nmacc.S |   2 +-
 .../vect_complex_s16_real_mul.S               |   2 +-
 .../vect_complex_s16_squared_mag.S            |   2 +-
 .../vect_complex_s16/vect_complex_s16_sum.S   |   2 +-
 .../vect_complex_s16_to_complex_s32.S         |   2 +-
 .../vect_complex_s32_complex_scale.S          |   2 +-
 .../vect_complex_s32_conj_macc.S              |   2 +-
 .../vect_complex_s32_conj_nmacc.S             |   2 +-
 .../vect_complex_s32_conjugate.S              |   2 +-
 .../vect_complex_s32_conjugate_mul.S          |   2 +-
 .../vect_complex_s32/vect_complex_s32_macc.S  |   2 +-
 .../vect_complex_s32/vect_complex_s32_mag.S   |   2 +-
 .../vect_complex_s32/vect_complex_s32_mul.S   |   2 +-
 .../vect_complex_s32/vect_complex_s32_nmacc.S |   2 +-
 .../vect_complex_s32_real_mul.S               |   2 +-
 .../vect_complex_s32_squared_mag.S            |   2 +-
 .../vect_complex_s32/vect_complex_s32_sum.S   |   2 +-
 .../vect_complex_s32_to_complex_s16.S         |   2 +-
 .../xs3/vect_f32/vect_complex_f32_conj_macc.S |   2 +-
 .../xs3/vect_f32/vect_complex_f32_conj_mul.S  |   2 +-
 .../arch/xs3/vect_f32/vect_complex_f32_macc.S |   2 +-
 .../arch/xs3/vect_f32/vect_complex_f32_mul.S  |   2 +-
 .../src/arch/xs3/vect_f32/vect_f32_add.S      |   2 +-
 .../src/arch/xs3/vect_f32/vect_f32_dot.S      |   2 +-
 .../arch/xs3/vect_f32/vect_f32_max_exponent.S |   2 +-
 .../src/arch/xs3/vect_f32/vect_f32_to_s32.S   |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_abs_sum.S  |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_argmax.S   |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_argmin.S   |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_clip.S     |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_dot.S      |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_energy.S   |   2 +-
 .../xs3/vect_s16/vect_s16_extract_high_byte.S |   2 +-
 .../xs3/vect_s16/vect_s16_extract_low_byte.S  |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_inverse.S  |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_macc.S     |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_max.S      |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_min.S      |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_mul.S      |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_nmacc.S    |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_scale.S    |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_sqrt.S     |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_sum.S      |   2 +-
 .../src/arch/xs3/vect_s16/vect_s16_to_s32.S   |   2 +-
 .../src/arch/xs3/vect_s32/s32_to_chunk_s32.S  |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_abs_sum.S  |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_argmax.S   |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_argmin.S   |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_clip.S     |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_dot.S      |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_energy.S   |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_inverse.S  |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_macc.S     |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_max.S      |   2 +-
 .../arch/xs3/vect_s32/vect_s32_merge_accs.S   |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_min.S      |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_mul.S      |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_nmacc.S    |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_scale.S    |   2 +-
 .../arch/xs3/vect_s32/vect_s32_split_accs.S   |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_sqrt.S     |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_sum.S      |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_to_f32.S   |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_to_s16.S   |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_unzip.S    |   2 +-
 .../src/arch/xs3/vect_s32/vect_s32_zip.S      |   2 +-
 .../arch/xs3/vect_s8/vect_s8_is_negative.S    |   2 +-
 .../src/arch/xs3/vect_sXX/vect_abs.S          |   2 +-
 .../src/arch/xs3/vect_sXX/vect_add.S          |   2 +-
 .../src/arch/xs3/vect_sXX/vect_headroom.S     |   2 +-
 .../src/arch/xs3/vect_sXX/vect_rect.S         |   2 +-
 .../arch/xs3/vect_sXX/vect_sXX_add_scalar.S   |   2 +-
 .../xs3/vect_sXX/vect_sXX_max_elementwise.S   |   2 +-
 .../xs3/vect_sXX/vect_sXX_min_elementwise.S   |   2 +-
 .../src/arch/xs3/vect_sXX/vect_set.S          |   2 +-
 .../src/arch/xs3/vect_sXX/vect_shl.S          |   2 +-
 .../src/arch/xs3/vect_sXX/vect_sub.S          |   2 +-
 lib_xcore_math/src/bfp/bfp_alloc.c            |   2 +-
 lib_xcore_math/src/bfp/bfp_complex_s16.c      |   2 +-
 lib_xcore_math/src/bfp/bfp_complex_s32.c      |   2 +-
 lib_xcore_math/src/bfp/bfp_init.c             |   2 +-
 lib_xcore_math/src/bfp/bfp_s16.c              |   2 +-
 lib_xcore_math/src/bfp/bfp_s32.c              |   2 +-
 .../src/bfp/misc/gradient_constraint.c        |   2 +-
 lib_xcore_math/src/dct/dct8x8.c               |   2 +-
 lib_xcore_math/src/dct/dct_forward.c          |   2 +-
 lib_xcore_math/src/dct/dct_inverse.c          |   2 +-
 .../src/etc/xmath_fft_lut/xmath_fft_lut.c     |   2 +-
 .../src/etc/xmath_fft_lut/xmath_fft_lut.h     |   2 +-
 lib_xcore_math/src/fft/fft_bfp.c              |   2 +-
 lib_xcore_math/src/fft/fft_f32.c              |   2 +-
 lib_xcore_math/src/filter/filters.c           |   2 +-
 lib_xcore_math/src/scalar/scalar_f32.c        |   2 +-
 .../src/scalar/scalar_float_complex_sXX.c     |   2 +-
 lib_xcore_math/src/scalar/scalar_float_s32.c  |   2 +-
 lib_xcore_math/src/scalar/scalar_float_s64.c  |   2 +-
 lib_xcore_math/src/scalar/scalar_ops.c        |   2 +-
 lib_xcore_math/src/scalar/scalar_qXX.c        |   2 +-
 lib_xcore_math/src/vect/chunk_s32.c           |   2 +-
 lib_xcore_math/src/vect/complex_prepare.c     |   2 +-
 lib_xcore_math/src/vect/convolve.c            |   2 +-
 lib_xcore_math/src/vect/mat_mul.c             |   2 +-
 lib_xcore_math/src/vect/prepare.c             |   2 +-
 .../src/vect/vect_complex_mag_rot_tables.c    |   2 +-
 lib_xcore_math/src/vect/vect_complex_s16.c    |   2 +-
 lib_xcore_math/src/vect/vect_complex_s32.c    |   2 +-
 lib_xcore_math/src/vect/vect_f32.c            |   2 +-
 lib_xcore_math/src/vect/vect_float_s32.c      |   2 +-
 lib_xcore_math/src/vect/vect_s16.c            |   2 +-
 lib_xcore_math/src/vect/vect_s32.c            |   2 +-
 lib_xcore_math/src/vect/vpu_const_vects.c     |   2 +-
 lib_xcore_math/src/vect/vpu_const_vects.h     |   2 +-
 lib_xcore_math/src/vect/vpu_helper.h          |   2 +-
 tests/Makefile                                | 210 +++++++
 .../bfp/complex/test_bfp_bitdepth_convert.c   |   2 +-
 .../src/bfp/complex/test_bfp_complex_add.c    |   2 +-
 .../bfp/complex/test_bfp_complex_add_scalar.c |   2 +-
 .../bfp/complex/test_bfp_complex_conj_macc.c  |   2 +-
 .../bfp/complex/test_bfp_complex_conjugate.c  |   2 +-
 .../complex/test_bfp_complex_conjugate_mul.c  |  11 +-
 .../src/bfp/complex/test_bfp_complex_energy.c |   2 +-
 .../src/bfp/complex/test_bfp_complex_macc.c   |  20 +-
 .../src/bfp/complex/test_bfp_complex_mag.c    |   2 +-
 .../src/bfp/complex/test_bfp_complex_make.c   |   2 +-
 .../src/bfp/complex/test_bfp_complex_mul.c    |  11 +-
 .../bfp/complex/test_bfp_complex_real_mul.c   |   2 +-
 .../bfp/complex/test_bfp_complex_real_scale.c |   2 +-
 .../src/bfp/complex/test_bfp_complex_scale.c  |  13 +-
 .../complex/test_bfp_complex_squared_mag.c    |  10 +-
 .../src/bfp/complex/test_bfp_complex_sub.c    |   2 +-
 .../complex/test_bfp_complex_use_exponent.c   |   2 +-
 .../src/bfp/complex/test_bfp_sum_complex.c    |  11 +-
 tests/bfp_tests/src/bfp/real/test_bfp_abs.c   |   8 +-
 .../bfp_tests/src/bfp/real/test_bfp_abs_sum.c |  12 +-
 tests/bfp_tests/src/bfp/real/test_bfp_add.c   |   2 +-
 .../src/bfp/real/test_bfp_add_scalar.c        |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_alloc.c |   2 +-
 .../bfp_tests/src/bfp/real/test_bfp_argmax.c  |   2 +-
 .../bfp_tests/src/bfp/real/test_bfp_argmin.c  |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_clip.c  |   2 +-
 .../src/bfp/real/test_bfp_convolve.c          |   2 +-
 .../bfp_tests/src/bfp/real/test_bfp_dealloc.c |   2 +-
 .../src/bfp/real/test_bfp_depth_convert.c     |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_dot.c   |   2 +-
 .../bfp_tests/src/bfp/real/test_bfp_energy.c  |   2 +-
 .../src/bfp/real/test_bfp_headroom.c          |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_init.c  |   2 +-
 .../bfp_tests/src/bfp/real/test_bfp_inverse.c |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_macc.c  |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_max.c   |  16 +-
 .../src/bfp/real/test_bfp_max_elementwise.c   |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_mean.c  |   9 +-
 tests/bfp_tests/src/bfp/real/test_bfp_min.c   |   2 +-
 .../src/bfp/real/test_bfp_min_elementwise.c   |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_mul.c   |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_rect.c  |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_rms.c   |   6 +-
 .../src/bfp/real/test_bfp_s16_accumulate.c    |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_scale.c |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_set.c   |   2 +-
 .../src/bfp/real/test_bfp_shl_vect.c          |   2 +-
 .../src/bfp/real/test_bfp_sqrt_vect.c         |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_sub.c   |   2 +-
 tests/bfp_tests/src/bfp/real/test_bfp_sum.c   |  14 +-
 .../src/bfp/real/test_bfp_use_exponent.c      |   2 +-
 tests/bfp_tests/src/main.c                    |   2 +-
 .../src/misc/test_bfp_gradient_constraint.c   |   2 +-
 tests/bfp_tests/src/tst_asserts.h             |   2 +-
 tests/bfp_tests/src/tst_common.c              |   2 +-
 tests/bfp_tests/src/tst_common.h              |   2 +-
 tests/bfp_tests/src/unity_config.h            |   2 +-
 tests/config.xml                              | 555 +++++++++++++++++
 tests/dct_tests/src/lib_dsp/dsp_dct.c         |   2 +-
 tests/dct_tests/src/lib_dsp/dsp_dct.h         |   2 +-
 tests/dct_tests/src/main.c                    |   2 +-
 tests/dct_tests/src/test_dct8x8.c             |   2 +-
 tests/dct_tests/src/test_dctXX_forward.c      |   2 +-
 tests/dct_tests/src/test_dctXX_inverse.c      |  18 +-
 tests/dct_tests/src/test_random.h             |   2 +-
 tests/dct_tests/src/tst_common.c              |   2 +-
 tests/dct_tests/src/tst_common.h              |   2 +-
 tests/dct_tests/src/unity_config.h            |   2 +-
 tests/fft_tests/src/main.c                    |   2 +-
 tests/fft_tests/src/test_bfp_fft.c            |   2 +-
 tests/fft_tests/src/test_bfp_pack_unpack.c    |   2 +-
 tests/fft_tests/src/test_fft_dif.c            |   2 +-
 tests/fft_tests/src/test_fft_dit.c            |   2 +-
 tests/fft_tests/src/test_fft_helpers.c        |   2 +-
 tests/fft_tests/src/test_fft_mono_adjust.c    |   2 +-
 tests/fft_tests/src/test_issue96.c            |   2 +-
 tests/fft_tests/src/test_random.h             |   2 +-
 tests/fft_tests/src/test_vect_f32_fft.c       |   2 +-
 tests/fft_tests/src/tst_common.c              |   2 +-
 tests/fft_tests/src/tst_common.h              |   2 +-
 tests/fft_tests/src/unity_config.h            |   2 +-
 .../script/test_filter_biquad_s32_case3.py    |   2 +-
 .../test_filter_biquad_sat_s32_case3.py       |   2 +-
 .../src/filter/test_filter_biquad_s32.c       |   2 +-
 .../src/filter/test_filter_biquad_sat_s32.c   |   6 +-
 .../src/filter/test_filter_fir_s16.c          |   2 +-
 .../filter/test_filter_fir_s16_push_sample.c  |   2 +-
 .../src/filter/test_filter_fir_s32.c          |   2 +-
 tests/filter_tests/src/main.c                 |   2 +-
 tests/filter_tests/src/tst_common.h           |   2 +-
 tests/filter_tests/src/unity_config.h         |   2 +-
 tests/legacy_build/src/main.c                 |   2 +-
 tests/scalar_tests/src/basic/test_cls.c       |   2 +-
 tests/scalar_tests/src/basic/test_hr.c        |   2 +-
 .../scalar_tests/src/float/test_fixed_trig.c  |  16 +-
 .../src/float/test_float_convert.c            |   2 +-
 tests/scalar_tests/src/float/test_float_exp.c |   6 +-
 tests/scalar_tests/src/float/test_float_log.c |   2 +-
 .../src/float/test_float_logistic.c           |   2 +-
 .../src/float/test_float_s32_sqrt.c           |   2 +-
 .../src/float/test_float_sXX_abs.c            |   2 +-
 .../src/float/test_float_sXX_add.c            |   2 +-
 .../src/float/test_float_sXX_div.c            |   2 +-
 .../src/float/test_float_sXX_ema.c            |   2 +-
 .../src/float/test_float_sXX_gt.c             |   2 +-
 .../src/float/test_float_sXX_mul.c            |   2 +-
 .../src/float/test_float_sXX_sub.c            |   2 +-
 .../scalar_tests/src/float/test_float_trig.c  |   2 +-
 .../scalar_tests/src/float/test_q30_powers.c  |   4 +-
 tests/scalar_tests/src/main.c                 |   2 +-
 tests/scalar_tests/src/tst_asserts.h          |   2 +-
 tests/scalar_tests/src/tst_common.c           |   2 +-
 tests/scalar_tests/src/tst_common.h           |   2 +-
 tests/scalar_tests/src/unity_config.h         |   2 +-
 tests/scalar_tests/src/util/test_s32_sqrt.c   |   2 +-
 .../scalar_tests/src/util/test_sXX_inverse.c  |   2 +-
 tests/scalar_tests/src/util/test_sXX_mul.c    |   6 +-
 tests/shared/floating_fft/floating_dct.c      |   2 +-
 tests/shared/floating_fft/floating_dct.h      |   2 +-
 tests/shared/floating_fft/floating_fft.h      |   2 +-
 .../shared/floating_fft/floating_fft_double.c |   2 +-
 .../shared/floating_fft/floating_fft_float.c  |   2 +-
 tests/shared/floating_fft/floating_fft_util.c |   2 +-
 tests/shared/pseudo_rand/pseudo_rand.c        |   2 +-
 tests/shared/pseudo_rand/pseudo_rand.h        |   2 +-
 tests/shared/pseudo_rand/rand_frame.c         |   2 +-
 tests/shared/pseudo_rand/rand_frame.h         |   2 +-
 tests/shared/testing/testing.h                |   2 +-
 tests/shared/testing/testing_conv.c           |   2 +-
 tests/shared/testing/testing_diff.c           |   2 +-
 tests/shared/testing/testing_misc.c           |   2 +-
 tests/shared/testing/testing_print.c          |   2 +-
 tests/vect_tests/src/main.c                   |   2 +-
 .../matrix/test_mat_mul_s8_x_s16_yield_s32.c  |   2 +-
 .../src/matrix/test_mat_mul_s8_x_s8.c         |   2 +-
 tests/vect_tests/src/tst_asserts.h            |   2 +-
 tests/vect_tests/src/tst_common.c             |   2 +-
 tests/vect_tests/src/tst_common.h             |   2 +-
 tests/vect_tests/src/unity_config.h           |   2 +-
 .../src/vect/complex/test_vect_complex_add.c  |   2 +-
 .../complex/test_vect_complex_add_scalar.c    |   2 +-
 .../complex/test_vect_complex_complex_scale.c |  40 +-
 .../complex/test_vect_complex_conj_macc.c     |   2 +-
 .../complex/test_vect_complex_conjugate.c     |   2 +-
 .../complex/test_vect_complex_conjugate_mul.c |  41 +-
 .../src/vect/complex/test_vect_complex_macc.c |   2 +-
 .../src/vect/complex/test_vect_complex_mag.c  |   6 +-
 .../src/vect/complex/test_vect_complex_mul.c  |  39 +-
 .../vect/complex/test_vect_complex_real_mul.c |  50 +-
 .../complex/test_vect_complex_real_scale.c    |   6 +-
 .../test_vect_complex_s16_to_complex_s32.c    |   2 +-
 .../test_vect_complex_s32_to_complex_s16.c    |   2 +-
 .../complex/test_vect_complex_squared_mag.c   |  17 +-
 .../src/vect/complex/test_vect_complex_sub.c  |   2 +-
 .../src/vect/complex/test_vect_sum_complex.c  |  22 +-
 .../vect/float/test_vect_complex_f32_macc.c   |   2 +-
 .../vect/float/test_vect_complex_f32_mul.c    |   2 +-
 .../src/vect/float/test_vect_f32_add.c        |   2 +-
 .../src/vect/float/test_vect_f32_dot.c        |   2 +-
 .../vect/float/test_vect_f32_max_exponent.c   |   2 +-
 .../src/vect/float/test_vect_f32_to_s32.c     |   2 +-
 .../src/vect/float/test_vect_s32_to_f32.c     |   2 +-
 .../src/vect/stat/test_vect_abs_sum.c         |  24 +-
 .../src/vect/stat/test_vect_argmax.c          |   2 +-
 .../src/vect/stat/test_vect_argmin.c          |   2 +-
 .../src/vect/stat/test_vect_energy.c          |   9 +-
 .../vect_tests/src/vect/stat/test_vect_max.c  |   2 +-
 .../vect_tests/src/vect/stat/test_vect_min.c  |   2 +-
 .../src/vect/test_chunk_s16_accumulate.c      |   2 +-
 tests/vect_tests/src/vect/test_vect_abs.c     |  22 +-
 tests/vect_tests/src/vect/test_vect_add.c     |   6 +-
 .../src/vect/test_vect_add_scalar.c           |   2 +-
 .../src/vect/test_vect_bitdepth_convert.c     |   4 +-
 tests/vect_tests/src/vect/test_vect_clip.c    |  30 +-
 tests/vect_tests/src/vect/test_vect_copy.c    |   2 +-
 tests/vect_tests/src/vect/test_vect_dot.c     |   2 +-
 tests/vect_tests/src/vect/test_vect_exp.c     |   6 +-
 .../vect_tests/src/vect/test_vect_headroom.c  |   2 +-
 tests/vect_tests/src/vect/test_vect_inverse.c |   2 +-
 tests/vect_tests/src/vect/test_vect_log.c     |  10 +-
 tests/vect_tests/src/vect/test_vect_macc.c    |   2 +-
 .../src/vect/test_vect_max_elementwise.c      |   3 +-
 .../src/vect/test_vect_min_elementwise.c      |   2 +-
 tests/vect_tests/src/vect/test_vect_mul.c     |  65 +-
 tests/vect_tests/src/vect/test_vect_rect.c    |   2 +-
 .../src/vect/test_vect_s16_extract.c          |   2 +-
 .../src/vect/test_vect_s32_convolve.c         |   2 +-
 .../src/vect/test_vect_s8_boolean.c           |   2 +-
 tests/vect_tests/src/vect/test_vect_scale.c   |  25 +-
 tests/vect_tests/src/vect/test_vect_set.c     |   2 +-
 tests/vect_tests/src/vect/test_vect_shl.c     |  10 +-
 tests/vect_tests/src/vect/test_vect_shr.c     |  10 +-
 tests/vect_tests/src/vect/test_vect_sqrt.c    |   8 +-
 tests/vect_tests/src/vect/test_vect_sub.c     |   8 +-
 tests/vect_tests/src/vect/test_vect_sum.c     |  17 +-
 tests/vect_tests/src/vect/test_vect_zip.c     |   2 +-
 tests/xs3_tests/src/dummy.xc                  |   2 +-
 tests/xs3_tests/src/main.c                    |   2 +-
 tests/xs3_tests/src/test_vpu_scalar_ops_s16.c |   4 +-
 tests/xs3_tests/src/test_vpu_scalar_ops_s32.c |   6 +-
 tests/xs3_tests/src/test_vpu_scalar_ops_s8.c  |   2 +-
 tests/xs3_tests/src/tst_asserts.h             |   2 +-
 tests/xs3_tests/src/tst_common.c              |   2 +-
 tests/xs3_tests/src/tst_common.h              |   2 +-
 tests/xs3_tests/src/unity_config.h            |   2 +-
 634 files changed, 21295 insertions(+), 662 deletions(-)
 create mode 100644 lib_xcore_math/src/arch/vx4b/NOTES.rst
 create mode 100644 lib_xcore_math/src/arch/vx4b/asm_helper.h
 create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.almost
 create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c
 create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_dot.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_log.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series_v2.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct12_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct16_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct24_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct6_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct8_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct_adsb_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct_deconvolve_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct6_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct8_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct_adsb.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct_convolve.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct_scale.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageA.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageB.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/dct/vect_s32_flip.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/fft/dif_fft.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/fft/dit_fft.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_index_bit_reversal.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_mono_adjust.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_spectra_merge.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_spectra_split.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/fft/tail_reverse_complex_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/misc/chunk_float_s32_log.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/misc/util.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/misc/vect_copy.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/misc/vect_float_s32_ln_prepare.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/misc/xs3_memcpy.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/f32_log2.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/f32_norm.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/f32_power_series.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/float_s32.c
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/float_s32_exp.almost
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/q24_logistic_fast.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/q30_exp_small.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/q30_odd_powers.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/q30_powers.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/radians_to_sbrads.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/sbrad_sin.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/sbrad_tan.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/sqrt_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_complex_scale.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_macc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_nmacc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conjugate_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_macc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mag.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_nmacc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_real_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_squared_mag.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_sum.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_to_complex_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_complex_scale.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_macc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_nmacc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_macc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mag.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_nmacc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_real_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_squared_mag.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_sum.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_to_complex_s16.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_macc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_macc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_add.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_dot.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_max_exponent.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_to_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs_sum.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmax.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmin.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_clip.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_energy.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_high_byte.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_low_byte.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_inverse.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_macc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_max.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_min.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_nmacc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_scale.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sqrt.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sum.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_to_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/s32_to_chunk_s32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs_sum.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmax.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmin.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_clip.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_dot.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_energy.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_inverse.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_macc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_max.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_merge_accs.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_min.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_mul.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_nmacc.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_scale.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_split_accs.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sqrt.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sum.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_f32.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_s16.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_unzip.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_zip.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s8/vect_s8_is_negative.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_add.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_headroom.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_rect.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_add_scalar.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_max_elementwise.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_min_elementwise.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_set.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_shl.S
 create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sub.S
 create mode 100644 tests/Makefile
 create mode 100644 tests/config.xml

diff --git a/doc/rst/src/reference/notes.h b/doc/rst/src/reference/notes.h
index 5aa326ef..67390f10 100644
--- a/doc/rst/src/reference/notes.h
+++ b/doc/rst/src/reference/notes.h
@@ -1,4 +1,4 @@
-// Copyright 2021-2024 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 // This file exists as a compatibility work-around between vanilla Doxygen and
diff --git a/examples/app_bfp_demo/src/main.c b/examples/app_bfp_demo/src/main.c
index 276df4f5..d1acacd1 100644
--- a/examples/app_bfp_demo/src/main.c
+++ b/examples/app_bfp_demo/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/examples/app_fft_demo/src/main.c b/examples/app_fft_demo/src/main.c
index a3649ea5..97ea7da8 100644
--- a/examples/app_fft_demo/src/main.c
+++ b/examples/app_fft_demo/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/examples/app_filter_demo/src/filter_16bit_fir.c b/examples/app_filter_demo/src/filter_16bit_fir.c
index 3aab0391..f05b1ad6 100644
--- a/examples/app_filter_demo/src/filter_16bit_fir.c
+++ b/examples/app_filter_demo/src/filter_16bit_fir.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/examples/app_filter_demo/src/filter_32bit_biquad.c b/examples/app_filter_demo/src/filter_32bit_biquad.c
index 34188b80..c38876e3 100644
--- a/examples/app_filter_demo/src/filter_32bit_biquad.c
+++ b/examples/app_filter_demo/src/filter_32bit_biquad.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/examples/app_filter_demo/src/filter_32bit_fir.c b/examples/app_filter_demo/src/filter_32bit_fir.c
index c986d4c8..8c5b496c 100644
--- a/examples/app_filter_demo/src/filter_32bit_fir.c
+++ b/examples/app_filter_demo/src/filter_32bit_fir.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/examples/app_filter_demo/src/main.c b/examples/app_filter_demo/src/main.c
index 499274e4..04c12fc7 100644
--- a/examples/app_filter_demo/src/main.c
+++ b/examples/app_filter_demo/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdio.h>
diff --git a/examples/app_vect_demo/src/main.c b/examples/app_vect_demo/src/main.c
index 73a59c04..7b870389 100644
--- a/examples/app_vect_demo/src/main.c
+++ b/examples/app_vect_demo/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdio.h>
diff --git a/examples/app_vect_demo/src/vect_complex_s16_example.c b/examples/app_vect_demo/src/vect_complex_s16_example.c
index 39277583..dcfd3578 100644
--- a/examples/app_vect_demo/src/vect_complex_s16_example.c
+++ b/examples/app_vect_demo/src/vect_complex_s16_example.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/examples/app_vect_demo/src/vect_s32_example.c b/examples/app_vect_demo/src/vect_s32_example.c
index 5edcfeb4..d52c603b 100644
--- a/examples/app_vect_demo/src/vect_s32_example.c
+++ b/examples/app_vect_demo/src/vect_s32_example.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/CMakeLists.txt b/lib_xcore_math/CMakeLists.txt
index be41759e..299b2df6 100644
--- a/lib_xcore_math/CMakeLists.txt
+++ b/lib_xcore_math/CMakeLists.txt
@@ -15,7 +15,8 @@ file( GLOB_RECURSE    SOURCES_C   "src/vect/*.c"
                                   "src/filter/*.c"
                                   "src/scalar/*.c" )
 file( GLOB_RECURSE    SOURCES_CPP "src/*.cpp" )
-file( GLOB_RECURSE    SOURCES_ASM "src/*.S"   )
+file( GLOB_RECURSE    SOURCES_ASM_XS3 "src/arch/xs3/*.S" )
+file( GLOB_RECURSE    SOURCES_ASM_VX4B "src/arch/vx4b/*.S" )
 file( GLOB_RECURSE    SOURCES_REF "src/arch/ref/*.c" )
 
 add_library( ${LIB_NAME}  STATIC )
@@ -24,7 +25,8 @@ target_sources( ${LIB_NAME}
   PRIVATE
     ${SOURCES_C}
     ${SOURCES_CPP}
-    $<$<PLATFORM_ID:XCORE_XS3A>:${SOURCES_ASM}>
+    $<$<PLATFORM_ID:XCORE_XS3A>:${SOURCES_ASM_XS3}>
+    $<$<PLATFORM_ID:XCORE_VX4B>:${SOURCES_ASM_VX4B}>
     $<$<PLATFORM_ID:Linux>:${SOURCES_REF}>
     $<$<PLATFORM_ID:Darwin>:${SOURCES_REF}>
     $<$<PLATFORM_ID:Windows>:${SOURCES_REF}>
diff --git a/lib_xcore_math/api/xcore_math.h b/lib_xcore_math/api/xcore_math.h
index f20c9065..35206223 100644
--- a/lib_xcore_math/api/xcore_math.h
+++ b/lib_xcore_math/api/xcore_math.h
@@ -1,4 +1,4 @@
-// Copyright 2024 XMOS LIMITED.
+// Copyright 2024-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #ifndef _XCORE_MATH_H_
diff --git a/lib_xcore_math/api/xmath/_support/dct_impl.h b/lib_xcore_math/api/xmath/_support/dct_impl.h
index c1947d37..7e0f01b3 100644
--- a/lib_xcore_math/api/xmath/_support/dct_impl.h
+++ b/lib_xcore_math/api/xmath/_support/dct_impl.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/_support/fft_impl.h b/lib_xcore_math/api/xmath/_support/fft_impl.h
index 7ab4eae8..66e35af8 100644
--- a/lib_xcore_math/api/xmath/_support/fft_impl.h
+++ b/lib_xcore_math/api/xmath/_support/fft_impl.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/api.h b/lib_xcore_math/api/xmath/api.h
index 9e51af19..80273a0f 100644
--- a/lib_xcore_math/api/xmath/api.h
+++ b/lib_xcore_math/api/xmath/api.h
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/bfp/bfp.h b/lib_xcore_math/api/xmath/bfp/bfp.h
index 98b61fa4..3baa654a 100644
--- a/lib_xcore_math/api/xmath/bfp/bfp.h
+++ b/lib_xcore_math/api/xmath/bfp/bfp.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/bfp/bfp_complex_s16.h b/lib_xcore_math/api/xmath/bfp/bfp_complex_s16.h
index ce9200a6..3cab8fb6 100644
--- a/lib_xcore_math/api/xmath/bfp/bfp_complex_s16.h
+++ b/lib_xcore_math/api/xmath/bfp/bfp_complex_s16.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/bfp/bfp_complex_s32.h b/lib_xcore_math/api/xmath/bfp/bfp_complex_s32.h
index 2047a509..89590cd3 100644
--- a/lib_xcore_math/api/xmath/bfp/bfp_complex_s32.h
+++ b/lib_xcore_math/api/xmath/bfp/bfp_complex_s32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/bfp/bfp_misc.h b/lib_xcore_math/api/xmath/bfp/bfp_misc.h
index d01a6f23..d35003c8 100644
--- a/lib_xcore_math/api/xmath/bfp/bfp_misc.h
+++ b/lib_xcore_math/api/xmath/bfp/bfp_misc.h
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/bfp/bfp_s16.h b/lib_xcore_math/api/xmath/bfp/bfp_s16.h
index fb2f7f12..0230d4c8 100644
--- a/lib_xcore_math/api/xmath/bfp/bfp_s16.h
+++ b/lib_xcore_math/api/xmath/bfp/bfp_s16.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/bfp/bfp_s32.h b/lib_xcore_math/api/xmath/bfp/bfp_s32.h
index 8c48a747..2402916a 100644
--- a/lib_xcore_math/api/xmath/bfp/bfp_s32.h
+++ b/lib_xcore_math/api/xmath/bfp/bfp_s32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/dct.h b/lib_xcore_math/api/xmath/dct.h
index ace528cb..60cadbd2 100644
--- a/lib_xcore_math/api/xmath/dct.h
+++ b/lib_xcore_math/api/xmath/dct.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/fft.h b/lib_xcore_math/api/xmath/fft.h
index 46a5dd2f..e1f27682 100644
--- a/lib_xcore_math/api/xmath/fft.h
+++ b/lib_xcore_math/api/xmath/fft.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/filter.h b/lib_xcore_math/api/xmath/filter.h
index 2b029d5e..7e8f0247 100644
--- a/lib_xcore_math/api/xmath/filter.h
+++ b/lib_xcore_math/api/xmath/filter.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/q_format.h b/lib_xcore_math/api/xmath/q_format.h
index e41af168..d7980a01 100644
--- a/lib_xcore_math/api/xmath/q_format.h
+++ b/lib_xcore_math/api/xmath/q_format.h
@@ -1,4 +1,4 @@
-// Copyright 2022 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/lib_xcore_math/api/xmath/scalar/f32.h b/lib_xcore_math/api/xmath/scalar/f32.h
index fdee5657..dfb63db9 100644
--- a/lib_xcore_math/api/xmath/scalar/f32.h
+++ b/lib_xcore_math/api/xmath/scalar/f32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/scalar/float_complex_s16.h b/lib_xcore_math/api/xmath/scalar/float_complex_s16.h
index dc7fe911..7a9a0749 100644
--- a/lib_xcore_math/api/xmath/scalar/float_complex_s16.h
+++ b/lib_xcore_math/api/xmath/scalar/float_complex_s16.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/scalar/float_complex_s32.h b/lib_xcore_math/api/xmath/scalar/float_complex_s32.h
index 4ca1a817..d51de583 100644
--- a/lib_xcore_math/api/xmath/scalar/float_complex_s32.h
+++ b/lib_xcore_math/api/xmath/scalar/float_complex_s32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/scalar/float_s32.h b/lib_xcore_math/api/xmath/scalar/float_s32.h
index 12dd2ddb..8a691f83 100644
--- a/lib_xcore_math/api/xmath/scalar/float_s32.h
+++ b/lib_xcore_math/api/xmath/scalar/float_s32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/scalar/s16.h b/lib_xcore_math/api/xmath/scalar/s16.h
index 981b626c..5c2c3379 100644
--- a/lib_xcore_math/api/xmath/scalar/s16.h
+++ b/lib_xcore_math/api/xmath/scalar/s16.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/scalar/s32.h b/lib_xcore_math/api/xmath/scalar/s32.h
index 141330f5..f9939a13 100644
--- a/lib_xcore_math/api/xmath/scalar/s32.h
+++ b/lib_xcore_math/api/xmath/scalar/s32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/scalar/scalar.h b/lib_xcore_math/api/xmath/scalar/scalar.h
index 4a6c12ca..f4284765 100644
--- a/lib_xcore_math/api/xmath/scalar/scalar.h
+++ b/lib_xcore_math/api/xmath/scalar/scalar.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/scalar/scalar_misc.h b/lib_xcore_math/api/xmath/scalar/scalar_misc.h
index ce4c217e..48cc2620 100644
--- a/lib_xcore_math/api/xmath/scalar/scalar_misc.h
+++ b/lib_xcore_math/api/xmath/scalar/scalar_misc.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/types.h b/lib_xcore_math/api/xmath/types.h
index 847145af..05064704 100644
--- a/lib_xcore_math/api/xmath/types.h
+++ b/lib_xcore_math/api/xmath/types.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/util.h b/lib_xcore_math/api/xmath/util.h
index 1cd5828e..35ac2127 100644
--- a/lib_xcore_math/api/xmath/util.h
+++ b/lib_xcore_math/api/xmath/util.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
@@ -238,12 +238,16 @@ void xs3_memcpy(
 static inline unsigned cls(
     const int32_t a)
 {
-#ifdef __XS3A__
+#if defined(__XS3A__)
 
     unsigned res;
     asm( "cls %0, %1" : "=r"(res) : "r"(a) );
     return res;
+#elif defined(__VX4B__)
 
+    unsigned res;
+    asm( "xm.cls %0, %1" : "=r"(res) : "r"(a) );
+    return res;
 #else
 
     if(a == 0 || a == -1)
diff --git a/lib_xcore_math/api/xmath/vect/chunk_s32.h b/lib_xcore_math/api/xmath/vect/chunk_s32.h
index 3bfcdbde..58668541 100644
--- a/lib_xcore_math/api/xmath/vect/chunk_s32.h
+++ b/lib_xcore_math/api/xmath/vect/chunk_s32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect.h b/lib_xcore_math/api/xmath/vect/vect.h
index aee286d3..69d370f9 100644
--- a/lib_xcore_math/api/xmath/vect/vect.h
+++ b/lib_xcore_math/api/xmath/vect/vect.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_complex_s16.h b/lib_xcore_math/api/xmath/vect/vect_complex_s16.h
index 3cf9b9d6..f23825e1 100644
--- a/lib_xcore_math/api/xmath/vect/vect_complex_s16.h
+++ b/lib_xcore_math/api/xmath/vect/vect_complex_s16.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_complex_s16_prepare.h b/lib_xcore_math/api/xmath/vect/vect_complex_s16_prepare.h
index 138a387e..4cc7175c 100644
--- a/lib_xcore_math/api/xmath/vect/vect_complex_s16_prepare.h
+++ b/lib_xcore_math/api/xmath/vect/vect_complex_s16_prepare.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_complex_s32.h b/lib_xcore_math/api/xmath/vect/vect_complex_s32.h
index 082012ac..c3c13912 100644
--- a/lib_xcore_math/api/xmath/vect/vect_complex_s32.h
+++ b/lib_xcore_math/api/xmath/vect/vect_complex_s32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_complex_s32_prepare.h b/lib_xcore_math/api/xmath/vect/vect_complex_s32_prepare.h
index e402c256..802904cd 100644
--- a/lib_xcore_math/api/xmath/vect/vect_complex_s32_prepare.h
+++ b/lib_xcore_math/api/xmath/vect/vect_complex_s32_prepare.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_f32.h b/lib_xcore_math/api/xmath/vect/vect_f32.h
index af30f2af..c6fd5786 100644
--- a/lib_xcore_math/api/xmath/vect/vect_f32.h
+++ b/lib_xcore_math/api/xmath/vect/vect_f32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_mixed.h b/lib_xcore_math/api/xmath/vect/vect_mixed.h
index adcab5d4..35eac7e6 100644
--- a/lib_xcore_math/api/xmath/vect/vect_mixed.h
+++ b/lib_xcore_math/api/xmath/vect/vect_mixed.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_s16.h b/lib_xcore_math/api/xmath/vect/vect_s16.h
index efb387f7..a272c3ad 100644
--- a/lib_xcore_math/api/xmath/vect/vect_s16.h
+++ b/lib_xcore_math/api/xmath/vect/vect_s16.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_s16_prepare.h b/lib_xcore_math/api/xmath/vect/vect_s16_prepare.h
index 8ce17e5c..21e85d03 100644
--- a/lib_xcore_math/api/xmath/vect/vect_s16_prepare.h
+++ b/lib_xcore_math/api/xmath/vect/vect_s16_prepare.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_s32.h b/lib_xcore_math/api/xmath/vect/vect_s32.h
index 8a3666e9..e880550a 100644
--- a/lib_xcore_math/api/xmath/vect/vect_s32.h
+++ b/lib_xcore_math/api/xmath/vect/vect_s32.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_s32_prepare.h b/lib_xcore_math/api/xmath/vect/vect_s32_prepare.h
index a80cb47f..8117c82b 100644
--- a/lib_xcore_math/api/xmath/vect/vect_s32_prepare.h
+++ b/lib_xcore_math/api/xmath/vect/vect_s32_prepare.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/vect/vect_s8.h b/lib_xcore_math/api/xmath/vect/vect_s8.h
index b4e9e26e..37a051cb 100644
--- a/lib_xcore_math/api/xmath/vect/vect_s8.h
+++ b/lib_xcore_math/api/xmath/vect/vect_s8.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/xmath.h b/lib_xcore_math/api/xmath/xmath.h
index b6b0fa8f..1857e314 100644
--- a/lib_xcore_math/api/xmath/xmath.h
+++ b/lib_xcore_math/api/xmath/xmath.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/xmath_conf.h b/lib_xcore_math/api/xmath/xmath_conf.h
index c6b44616..868d39f9 100644
--- a/lib_xcore_math/api/xmath/xmath_conf.h
+++ b/lib_xcore_math/api/xmath/xmath_conf.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/api/xmath/xs3/vpu_info.h b/lib_xcore_math/api/xmath/xs3/vpu_info.h
index d219977d..a69aa2db 100644
--- a/lib_xcore_math/api/xmath/xs3/vpu_info.h
+++ b/lib_xcore_math/api/xmath/xs3/vpu_info.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
@@ -72,7 +72,11 @@ enum {
  *
  * @ingroup xs3_vpu_info
 */
-#define VPU_INT8_MIN   ( -0x7F )
+#if defined(__VX4B__)
+#define VPU_INT8_MIN  (  INT8_MIN)
+#else
+#define VPU_INT8_MIN  ( -0x7F )
+#endif
 /** The upper saturation bound for 16-bit elements
  *
  * @ingroup xs3_vpu_info
@@ -82,7 +86,11 @@ enum {
  *
  * @ingroup xs3_vpu_info
 */
+#if defined(__VX4B__)
+#define VPU_INT16_MIN  (  INT16_MIN)
+#else
 #define VPU_INT16_MIN  ( -0x7FFF )
+#endif
 /** The upper saturation bound for 32-bit elements and 32-bit accumulators
  *
  * @ingroup xs3_vpu_info
@@ -92,7 +100,11 @@ enum {
  *
  * @ingroup xs3_vpu_info
 */
+#if defined(__VX4B__)
+#define VPU_INT32_MIN  (  INT32_MIN)
+#else
 #define VPU_INT32_MIN  ( -0x7FFFFFFF )
+#endif
 /** The upper saturation bound for 40-bit accumulators
  *
  * @ingroup xs3_vpu_info
@@ -102,7 +114,11 @@ enum {
  *
  * @ingroup xs3_vpu_info
 */
+#if defined(__VX4B__)
+#define VPU_INT40_MIN  ( -0x8000000000LL)
+#else
 #define VPU_INT40_MIN  ( -0x7FFFFFFFFFLL )
+#endif
 
 /**
  * Number of accumulator bits in each operating mode.
diff --git a/lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h b/lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h
index aa1f7615..9ec9cb1a 100644
--- a/lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h
+++ b/lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
@@ -149,6 +149,13 @@ int16_t vlmul16(
     const int16_t x,
     const int16_t y);
 
+/**
+ * Implements the logic of the VLMUL instruction in 16-bit mode.
+ */
+int16_t vlmul16_vx4b(
+    const int16_t x,
+    const int16_t y);
+
 /**
  * Implements the logic of the VLMACC instruction in 16-bit mode.
  */
@@ -170,7 +177,11 @@ vpu_int16_acc_t vlmaccr16(
  */
 int16_t vlsat16(
     const vpu_int16_acc_t acc,
+    #if defined(__VX4B__)
+    const right_shift_t sat);
+    #else
     const unsigned sat);
+    #endif
 
 /**
  * Implements the logic of the VADDDR instruction in 16-bit mode.
diff --git a/lib_xcore_math/python/gen_biquad_filter_s32.py b/lib_xcore_math/python/gen_biquad_filter_s32.py
index 11972696..d9654402 100644
--- a/lib_xcore_math/python/gen_biquad_filter_s32.py
+++ b/lib_xcore_math/python/gen_biquad_filter_s32.py
@@ -1,4 +1,4 @@
-# Copyright 2021-2024 XMOS LIMITED.
+# Copyright 2021-2026 XMOS LIMITED.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 import numpy as np
diff --git a/lib_xcore_math/python/gen_fft_table.py b/lib_xcore_math/python/gen_fft_table.py
index 5029491d..56258ba2 100644
--- a/lib_xcore_math/python/gen_fft_table.py
+++ b/lib_xcore_math/python/gen_fft_table.py
@@ -1,4 +1,4 @@
-# Copyright 2020-2024 XMOS LIMITED.
+# Copyright 2020-2026 XMOS LIMITED.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 import numpy as np
 import argparse
diff --git a/lib_xcore_math/python/gen_fir_filter_s16.py b/lib_xcore_math/python/gen_fir_filter_s16.py
index e8c9bcc5..a9134958 100644
--- a/lib_xcore_math/python/gen_fir_filter_s16.py
+++ b/lib_xcore_math/python/gen_fir_filter_s16.py
@@ -1,4 +1,4 @@
-# Copyright 2021-2024 XMOS LIMITED.
+# Copyright 2021-2026 XMOS LIMITED.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 import numpy as np
diff --git a/lib_xcore_math/python/gen_fir_filter_s32.py b/lib_xcore_math/python/gen_fir_filter_s32.py
index 27ea7ab2..a6b998d7 100644
--- a/lib_xcore_math/python/gen_fir_filter_s32.py
+++ b/lib_xcore_math/python/gen_fir_filter_s32.py
@@ -1,4 +1,4 @@
-# Copyright 2021-2024 XMOS LIMITED.
+# Copyright 2021-2026 XMOS LIMITED.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 import numpy as np
diff --git a/lib_xcore_math/python/gen_rot_table.py b/lib_xcore_math/python/gen_rot_table.py
index d4377153..bb191c38 100644
--- a/lib_xcore_math/python/gen_rot_table.py
+++ b/lib_xcore_math/python/gen_rot_table.py
@@ -1,4 +1,4 @@
-# Copyright 2020-2024 XMOS LIMITED.
+# Copyright 2020-2026 XMOS LIMITED.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 import numpy as np
 
diff --git a/lib_xcore_math/python/xmath_script.py b/lib_xcore_math/python/xmath_script.py
index c91a4429..2718c691 100644
--- a/lib_xcore_math/python/xmath_script.py
+++ b/lib_xcore_math/python/xmath_script.py
@@ -1,4 +1,4 @@
-# Copyright 2021-2024 XMOS LIMITED.
+# Copyright 2021-2026 XMOS LIMITED.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 import numpy as np
diff --git a/lib_xcore_math/src/arch/ref/bool/vect_s8_is_negative.c b/lib_xcore_math/src/arch/ref/bool/vect_s8_is_negative.c
index 944a48b5..285fd7fe 100644
--- a/lib_xcore_math/src/arch/ref/bool/vect_s8_is_negative.c
+++ b/lib_xcore_math/src/arch/ref/bool/vect_s8_is_negative.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/ref/chunk.c b/lib_xcore_math/src/arch/ref/chunk.c
index ff89f546..84774c0a 100644
--- a/lib_xcore_math/src/arch/ref/chunk.c
+++ b/lib_xcore_math/src/arch/ref/chunk.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/chunk_s16_accumulate.c b/lib_xcore_math/src/arch/ref/chunk_s16_accumulate.c
index 57a4cbe0..6695d511 100644
--- a/lib_xcore_math/src/arch/ref/chunk_s16_accumulate.c
+++ b/lib_xcore_math/src/arch/ref/chunk_s16_accumulate.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_conj_macc.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_conj_macc.c
index 454079c9..55b7e546 100644
--- a/lib_xcore_math/src/arch/ref/complex/vect_complex_conj_macc.c
+++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_conj_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_conjugate.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_conjugate.c
index cd36ae79..46c4dba3 100644
--- a/lib_xcore_math/src/arch/ref/complex/vect_complex_conjugate.c
+++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_conjugate.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_depth_convert.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_depth_convert.c
index 774b5db6..eb7568d8 100644
--- a/lib_xcore_math/src/arch/ref/complex/vect_complex_depth_convert.c
+++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_depth_convert.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_macc.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_macc.c
index 7f7751c2..6a00a946 100644
--- a/lib_xcore_math/src/arch/ref/complex/vect_complex_macc.c
+++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_mag.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_mag.c
index cf993a68..bc22f1e9 100644
--- a/lib_xcore_math/src/arch/ref/complex/vect_complex_mag.c
+++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_mag.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_mul.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_mul.c
index f171a413..5f81024c 100644
--- a/lib_xcore_math/src/arch/ref/complex/vect_complex_mul.c
+++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_sum.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_sum.c
index 6c50d30e..3d89a07c 100644
--- a/lib_xcore_math/src/arch/ref/complex/vect_complex_sum.c
+++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_sum.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/dct/dct.c b/lib_xcore_math/src/arch/ref/dct/dct.c
index d84ac42c..2c266868 100644
--- a/lib_xcore_math/src/arch/ref/dct/dct.c
+++ b/lib_xcore_math/src/arch/ref/dct/dct.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/dct/dct8x8.c b/lib_xcore_math/src/arch/ref/dct/dct8x8.c
index 8d92d2a3..8e1a45f4 100644
--- a/lib_xcore_math/src/arch/ref/dct/dct8x8.c
+++ b/lib_xcore_math/src/arch/ref/dct/dct8x8.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/dct/idct.c b/lib_xcore_math/src/arch/ref/dct/idct.c
index 65e603d8..25bfef17 100644
--- a/lib_xcore_math/src/arch/ref/dct/idct.c
+++ b/lib_xcore_math/src/arch/ref/dct/idct.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/f32.c b/lib_xcore_math/src/arch/ref/f32.c
index 147d5dbc..4778f2dd 100644
--- a/lib_xcore_math/src/arch/ref/f32.c
+++ b/lib_xcore_math/src/arch/ref/f32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/fft/fft_dif.c b/lib_xcore_math/src/arch/ref/fft/fft_dif.c
index 1a84a8e7..b7fb9143 100644
--- a/lib_xcore_math/src/arch/ref/fft/fft_dif.c
+++ b/lib_xcore_math/src/arch/ref/fft/fft_dif.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/fft/fft_dit.c b/lib_xcore_math/src/arch/ref/fft/fft_dit.c
index 7a30470a..fe7869fd 100644
--- a/lib_xcore_math/src/arch/ref/fft/fft_dit.c
+++ b/lib_xcore_math/src/arch/ref/fft/fft_dit.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/fft/fft_util.c b/lib_xcore_math/src/arch/ref/fft/fft_util.c
index 73f2fd2f..82353959 100644
--- a/lib_xcore_math/src/arch/ref/fft/fft_util.c
+++ b/lib_xcore_math/src/arch/ref/fft/fft_util.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdio.h>
diff --git a/lib_xcore_math/src/arch/ref/filter/filter_biquad_s32.c b/lib_xcore_math/src/arch/ref/filter/filter_biquad_s32.c
index 750d00bf..191cb2d7 100644
--- a/lib_xcore_math/src/arch/ref/filter/filter_biquad_s32.c
+++ b/lib_xcore_math/src/arch/ref/filter/filter_biquad_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/ref/filter/filter_biquad_sat_s32.c b/lib_xcore_math/src/arch/ref/filter/filter_biquad_sat_s32.c
index 23490e6d..62133156 100644
--- a/lib_xcore_math/src/arch/ref/filter/filter_biquad_sat_s32.c
+++ b/lib_xcore_math/src/arch/ref/filter/filter_biquad_sat_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/ref/filter/filter_fir_s16.c b/lib_xcore_math/src/arch/ref/filter/filter_fir_s16.c
index 42c924e5..b91a2e03 100644
--- a/lib_xcore_math/src/arch/ref/filter/filter_fir_s16.c
+++ b/lib_xcore_math/src/arch/ref/filter/filter_fir_s16.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/ref/filter/filter_fir_s32.c b/lib_xcore_math/src/arch/ref/filter/filter_fir_s32.c
index e4756a02..e597b147 100644
--- a/lib_xcore_math/src/arch/ref/filter/filter_fir_s32.c
+++ b/lib_xcore_math/src/arch/ref/filter/filter_fir_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/ref/float_s32.c b/lib_xcore_math/src/arch/ref/float_s32.c
index bf1641cd..5cc324f1 100644
--- a/lib_xcore_math/src/arch/ref/float_s32.c
+++ b/lib_xcore_math/src/arch/ref/float_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/matrix/mat_mul_s8_x_s8_yield_s32.c b/lib_xcore_math/src/arch/ref/matrix/mat_mul_s8_x_s8_yield_s32.c
index bf9f77ca..193f0fa9 100644
--- a/lib_xcore_math/src/arch/ref/matrix/mat_mul_s8_x_s8_yield_s32.c
+++ b/lib_xcore_math/src/arch/ref/matrix/mat_mul_s8_x_s8_yield_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/misc.c b/lib_xcore_math/src/arch/ref/misc.c
index 01cff09d..18ce4536 100644
--- a/lib_xcore_math/src/arch/ref/misc.c
+++ b/lib_xcore_math/src/arch/ref/misc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/qXX.c b/lib_xcore_math/src/arch/ref/qXX.c
index 27b4fdd6..69d13f5b 100644
--- a/lib_xcore_math/src/arch/ref/qXX.c
+++ b/lib_xcore_math/src/arch/ref/qXX.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/s32_sqrt.c b/lib_xcore_math/src/arch/ref/s32_sqrt.c
index ac6a21c2..1033297f 100644
--- a/lib_xcore_math/src/arch/ref/s32_sqrt.c
+++ b/lib_xcore_math/src/arch/ref/s32_sqrt.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_abs_clip_rect.c b/lib_xcore_math/src/arch/ref/vect_abs_clip_rect.c
index 3a21d3e3..082d72df 100644
--- a/lib_xcore_math/src/arch/ref/vect_abs_clip_rect.c
+++ b/lib_xcore_math/src/arch/ref/vect_abs_clip_rect.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_add_sub.c b/lib_xcore_math/src/arch/ref/vect_add_sub.c
index 87550144..98f188e6 100644
--- a/lib_xcore_math/src/arch/ref/vect_add_sub.c
+++ b/lib_xcore_math/src/arch/ref/vect_add_sub.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_convolve.c b/lib_xcore_math/src/arch/ref/vect_convolve.c
index ef542e75..6f2de966 100644
--- a/lib_xcore_math/src/arch/ref/vect_convolve.c
+++ b/lib_xcore_math/src/arch/ref/vect_convolve.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_copy.c b/lib_xcore_math/src/arch/ref/vect_copy.c
index 2a17e762..1b38dd05 100644
--- a/lib_xcore_math/src/arch/ref/vect_copy.c
+++ b/lib_xcore_math/src/arch/ref/vect_copy.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_depth_convert.c b/lib_xcore_math/src/arch/ref/vect_depth_convert.c
index 69f63a0c..19f85000 100644
--- a/lib_xcore_math/src/arch/ref/vect_depth_convert.c
+++ b/lib_xcore_math/src/arch/ref/vect_depth_convert.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_dot.c b/lib_xcore_math/src/arch/ref/vect_dot.c
index 0b98ebaf..700fd2b8 100644
--- a/lib_xcore_math/src/arch/ref/vect_dot.c
+++ b/lib_xcore_math/src/arch/ref/vect_dot.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_f32.c b/lib_xcore_math/src/arch/ref/vect_f32.c
index 26aaa178..7543c66e 100644
--- a/lib_xcore_math/src/arch/ref/vect_f32.c
+++ b/lib_xcore_math/src/arch/ref/vect_f32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_headroom.c b/lib_xcore_math/src/arch/ref/vect_headroom.c
index 4700f1d3..e8e77e8d 100644
--- a/lib_xcore_math/src/arch/ref/vect_headroom.c
+++ b/lib_xcore_math/src/arch/ref/vect_headroom.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_inverse.c b/lib_xcore_math/src/arch/ref/vect_inverse.c
index 656fe774..d8845d86 100644
--- a/lib_xcore_math/src/arch/ref/vect_inverse.c
+++ b/lib_xcore_math/src/arch/ref/vect_inverse.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_macc.c b/lib_xcore_math/src/arch/ref/vect_macc.c
index 8e3c0ef3..b9069f41 100644
--- a/lib_xcore_math/src/arch/ref/vect_macc.c
+++ b/lib_xcore_math/src/arch/ref/vect_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/ref/vect_mul.c b/lib_xcore_math/src/arch/ref/vect_mul.c
index e4871391..fa9ea579 100644
--- a/lib_xcore_math/src/arch/ref/vect_mul.c
+++ b/lib_xcore_math/src/arch/ref/vect_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_s16_extract.c b/lib_xcore_math/src/arch/ref/vect_s16_extract.c
index d4d8153a..a257c884 100644
--- a/lib_xcore_math/src/arch/ref/vect_s16_extract.c
+++ b/lib_xcore_math/src/arch/ref/vect_s16_extract.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_sXX.c b/lib_xcore_math/src/arch/ref/vect_sXX.c
index 1eefc892..0e6508c3 100644
--- a/lib_xcore_math/src/arch/ref/vect_sXX.c
+++ b/lib_xcore_math/src/arch/ref/vect_sXX.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_set.c b/lib_xcore_math/src/arch/ref/vect_set.c
index 08269146..3e9ca808 100644
--- a/lib_xcore_math/src/arch/ref/vect_set.c
+++ b/lib_xcore_math/src/arch/ref/vect_set.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_shl.c b/lib_xcore_math/src/arch/ref/vect_shl.c
index 853ca7ad..5672ec1f 100644
--- a/lib_xcore_math/src/arch/ref/vect_shl.c
+++ b/lib_xcore_math/src/arch/ref/vect_shl.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_sqrt.c b/lib_xcore_math/src/arch/ref/vect_sqrt.c
index 883a1815..0416aa06 100644
--- a/lib_xcore_math/src/arch/ref/vect_sqrt.c
+++ b/lib_xcore_math/src/arch/ref/vect_sqrt.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_stats.c b/lib_xcore_math/src/arch/ref/vect_stats.c
index c937480c..c31fb5ce 100644
--- a/lib_xcore_math/src/arch/ref/vect_stats.c
+++ b/lib_xcore_math/src/arch/ref/vect_stats.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_sum.c b/lib_xcore_math/src/arch/ref/vect_sum.c
index a0683d8f..4a750db1 100644
--- a/lib_xcore_math/src/arch/ref/vect_sum.c
+++ b/lib_xcore_math/src/arch/ref/vect_sum.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vect_zip.c b/lib_xcore_math/src/arch/ref/vect_zip.c
index 62455503..f6b75b8e 100644
--- a/lib_xcore_math/src/arch/ref/vect_zip.c
+++ b/lib_xcore_math/src/arch/ref/vect_zip.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c b/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c
index 346d4214..15817ad8 100644
--- a/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c
+++ b/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdio.h>
@@ -82,7 +82,12 @@ int8_t vlmul8(
     const int8_t y)
 {
     int32_t p = ((int32_t)x)*y;
+    
+    #if defined(__VX4B__)
+    p = ROUND_SHR32(p, 7);
+    #else
     p = ROUND_SHR32(p, 6);
+    #endif
     return SAT(8)(p);
 }
 
@@ -197,6 +202,15 @@ int16_t vlmul16(
     return SAT(16)(p);
 }
 
+int16_t vlmul16_vx4b(
+    const int16_t x,
+    const int16_t y)
+{
+    int32_t p = ((int32_t)x)*y;
+    p = ROUND_SHR32(p, 15);
+    return SAT(16)(p);
+}
+
 
 vpu_int16_acc_t vlmacc16(
     const vpu_int16_acc_t acc,
@@ -225,11 +239,20 @@ vpu_int16_acc_t vlmaccr16(
 
 int16_t vlsat16(
     const vpu_int16_acc_t acc,
+    #if defined(__VX4B__)
+    const right_shift_t sat)
+    #else
     const unsigned sat)
+    #endif
 {
-    vpu_int16_acc_t s = acc;
+    int64_t s = acc;
 
-    if(sat >= 32) return (acc >= 0)? 0 : -1;
+    #if defined(__VX4B__)
+        if(sat < 0)
+            s = s << (-sat);
+     #else
+        if(sat >= 32) return (acc >= 0)? 0 : -1;
+    #endif
 
     if(sat > 0)
         s = ((acc >> (sat-1)) + 1) >> 1;
diff --git a/lib_xcore_math/src/arch/vx4b/NOTES.rst b/lib_xcore_math/src/arch/vx4b/NOTES.rst
new file mode 100644
index 00000000..30be7231
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/NOTES.rst
@@ -0,0 +1,5 @@
+The following functions have not been vectorised:
+
+chunk_s16_accumulate
+vect_s16_dot
+float_s32
\ No newline at end of file
diff --git a/lib_xcore_math/src/arch/vx4b/asm_helper.h b/lib_xcore_math/src/arch/vx4b/asm_helper.h
new file mode 100644
index 00000000..43d5bfc6
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/asm_helper.h
@@ -0,0 +1,25 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#ifndef ASM_HELPER_H_
+#define ASM_HELPER_H_
+
+#include "xmath/xmath_conf.h"
+
+#define EPV_LOG2_S8     5
+#define EPV_LOG2_S16    4
+#define EPV_LOG2_S32    3
+#define EPV_LOG2_C32    2
+
+#define SIZEOF_LOG2_S8  0
+#define SIZEOF_LOG2_S16 1
+#define SIZEOF_LOG2_S32 2
+#define SIZEOF_LOG2_C32 3
+
+#define HR_SUB_S8   7
+#define HR_SUB_S16  15
+#define HR_SUB_S32  31
+
+
+    
+#endif // ASM_HELPER_H_
diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.almost b/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.almost
new file mode 100644
index 00000000..67b8beb9
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.almost
@@ -0,0 +1,97 @@
+// Copyright 2020-2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+.text
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+/*  
+
+The first time this is called on a vector, `vpu_init` should be set
+to 0x0100  (16-bit mode with no headroom mask).  This function will
+vsetc with that value, and the result of vgetc will be returned at
+the end of this function. This way the caller need not repeatedly
+compare headroom for each chunk with the minimum found so far.
+
+Instead, after all chunks have been processed, the headroom can be
+computed from the final value returned.
+
+unsigned chunk_s16_accumulate(
+    split_acc_s32_t* acc,
+    const int16_t b[VPU_INT16_EPV],
+    const right_shift_t b_shr,
+    const unsigned vpu_ctrl);
+*/
+#define FUNCTION_NAME   chunk_s16_accumulate
+#define NSTACKWORDS     (4 + 8+4)
+
+#define STACK_VEC_C     (NSTACKWORDS - 8-4)
+
+#define acc       x10
+#define b         x11
+#define b_shr     x12
+#define vec_c     x13
+
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+
+{ mv t3, a3                 ; xm.vldd acc}
+{ nop                       ; xm.vsetc t3}
+{ xm.cls t3, b_shr          ; nop}
+{ addi vec_c,sp, (STACK_VEC_C)*4 ; xm.brff t3, .L_b_shr_neg        }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_b_shr_pos:
+    // non-neg b_shr means we want vlashr
+    la t3, vpu_vec_0x0001
+    xm.vlashr b, b_shr
+  { xm.mkmski b_shr, 32             ; xm.vldc t3}
+    xm.vstrpv vec_c, b_shr
+    addi t3, acc, 32
+  { nop                             ; xm.vldd acc}
+  { nop                             ; xm.vldr t3}
+  { nop                             ; xm.vlmacc0 vec_c}
+  xm.vlmacc1 vec_c
+  { nop                             ; xm.vstd acc}
+    xm.vstrpv t3, b_shr
+  { nop                             ; xm.vgetc t3}
+  { mv a0, t3                 ; xm.retsp (NSTACKWORDS)*4           }
+
+.L_b_shr_neg:
+    // neg b_shr means we want to set c[] to a power of 2
+    la t3, vpu_vec_0x0001
+    xm.vlashr t3, b_shr
+  { xm.mkmski b_shr, 32             ; xm.vldd acc}
+    xm.vstrpv vec_c, b_shr
+  { nop                             ; xm.vldc vec_c}
+    addi t3, acc, 32
+  { nop                             ; xm.vldr t3}
+  { nop                             ; xm.vlmacc0 b}
+  xm.vlmacc1 vec_c
+  { nop                             ; xm.vstd acc}
+    xm.vstrpv t3, b_shr
+  { nop                             ; xm.vgetc t3}
+  { mv a0, t3                 ; xm.retsp (NSTACKWORDS)*4           }
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c b/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c
new file mode 100644
index 00000000..3670acde
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c
@@ -0,0 +1,47 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined (__VX4B__)
+#include <stdint.h>
+#include <stdio.h>
+
+#include "xmath/xmath.h"
+#include "vpu_helper.h"
+#include "xmath/xs3/vpu_scalar_ops.h"
+
+
+
+
+
+
+unsigned chunk_s16_accumulate(
+    split_acc_s32_t* acc,
+    const int16_t b[VPU_INT16_EPV],
+    const right_shift_t b_shr,
+    const unsigned vpu_ctrl)
+{
+  unsigned vc = vpu_ctrl & 0x1F;
+
+  for(int k = 0; k < VPU_INT16_EPV; k++){
+    int32_t hi = acc->vD[k];
+    uint32_t lo = acc->vR[k];
+    int32_t acc32 = (hi << 16) | lo;
+
+    int32_t b_mod = b[k];
+    if(b_shr >= 0)
+      b_mod = b_mod >> b_shr;
+    else
+      b_mod = b_mod << (-b_shr);
+
+     acc32 += b_mod;
+
+     acc->vD[k] = (acc32 >> 16) & 0xFFFF;
+     acc->vR[k] = acc32 & 0xFFFF;
+
+     unsigned tmp = 15 - HR_S16(acc->vD[k]);
+     vc = MAX(vc, tmp);
+  }
+  return vc;
+}
+
+
+#endif
\ No newline at end of file
diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_dot.S b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_dot.S
new file mode 100644
index 00000000..d01b2f99
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_dot.S
@@ -0,0 +1,50 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+.text
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+/*  
+int32_t chunk_s32_dot(
+    const int32_t b[VPU_INT32_EPV],
+    const q2_30 c[VPU_INT32_EPV]);
+*/
+#define FUNCTION_NAME   chunk_s32_dot
+#define NSTACKWORDS     (4)
+
+#define b         x10
+#define c         x11
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+{ li t3, 0                        ; xm.vldc b}
+{ xm.mkmski t3, 4                 ; xm.vsetc t3}
+{ nop                             ; xm.vclrdr                      }
+{ addi a2,sp, 0                   ; xm.vlmaccr0 c}
+  xm.vstrpv a2, t3
+{ nop                             ; lw a0, 0               (sp)}
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_log.S b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_log.S
new file mode 100644
index 00000000..33023259
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_log.S
@@ -0,0 +1,176 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+Condition:  0 < ldexp(b[k], -30) < 2
+
+
+void chunk_s32_log(
+    q8_24 a[],
+    const int32_t b[],
+    const exponent_t b_exp);
+*/
+
+
+#define NSTACKWORDS     (8+48+4)
+
+#define FUNCTION_NAME   chunk_s32_log
+
+#define SP_VEC_X1    ((NSTACKWORDS) - 8 -4)
+#define SP_VEC_X2    ((NSTACKWORDS) - 16-4)
+#define SP_VEC_X3    ((NSTACKWORDS) - 24-4)
+#define SP_VEC_X4    ((NSTACKWORDS) - 32-4)
+#define SP_VEC_X5    ((NSTACKWORDS) - 40-4)
+#define SP_VEC_X6    ((NSTACKWORDS) - 48-4)
+
+
+.text
+.p2align 2
+
+.L_ps_coef1: .word -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000 /* Translation error on this line: unexpected token at position 13. */ 
+.L_ps_coef2: .word  0x555555,  0x555555,  0x555555,  0x555555,  0x555555,  0x555555,  0x555555,  0x555555 /* Translation error on this line: unexpected token at position 13. */ 
+.L_ps_coef3: .word -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000 /* Translation error on this line: unexpected token at position 13. */ 
+.L_ps_coef4: .word  0x333333,  0x333333,  0x333333,  0x333333,  0x333333,  0x333333,  0x333333,  0x333333 /* Translation error on this line: unexpected token at position 13. */ 
+.L_ps_coef5: .word -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab /* Translation error on this line: unexpected token at position 13. */ 
+ 
+.L_ln_2: .word 0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe /* Translation error on this line: unexpected token at position 9. */ 
+
+#define a           x10 
+#define b           x11
+#define b_exp       x12
+#define mantB       x13
+#define tmpA        x18
+#define tmpB        x19
+#define tmpC        x20
+#define vec_x       x21
+#define mantA       x28
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+{ addi vec_x,sp, (SP_VEC_X1)*4   ; li t3, 0                  }
+{nop; ; xm.vsetc t3}
+  xm.lddi  mantA,mantB, 0(b)
+  { xm.cls tmpA, mantA             ; nop             }
+  { nop                            ; xm.cls tmpB, mantB             }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  mantA,mantB, 0(vec_x)
+{ sub mantA, b_exp, tmpA      ; sub mantB, b_exp, tmpB      }
+  xm.stdi  mantA,mantB, 0(a)
+
+  xm.lddi  mantA,mantB, 8(b)
+  { xm.cls tmpA, mantA             ; nop             }
+  { nop                            ; xm.cls tmpB, mantB             }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  mantA,mantB, 8(vec_x)
+{ sub mantA, b_exp, tmpA      ; sub mantB, b_exp, tmpB      }
+  xm.stdi  mantA,mantB, 8(a)
+
+  xm.lddi  mantA,mantB, 16(b)
+  { xm.cls tmpA, mantA             ; nop             }
+  { nop                            ; xm.cls tmpB, mantB             }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  mantA,mantB, 16(vec_x)
+{ sub mantA, b_exp, tmpA      ; sub mantB, b_exp, tmpB      }
+  xm.stdi  mantA,mantB, 16(a)
+
+  xm.lddi  mantA,mantB, 24(b)
+  { xm.cls tmpA, mantA             ; nop             }
+  { nop                            ; xm.cls tmpB, mantB             }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  mantA,mantB, 24(vec_x)
+{ sub mantA, b_exp, tmpA      ; sub mantB, b_exp, tmpB      }
+  xm.stdi  mantA,mantB, 24(a)
+
+{ li tmpA, 24                ;nop}
+
+  la t3, vpu_vec_0x20000000
+{ nop                         ; xm.vclrdr                      }
+{ xm.neg tmpA, tmpA           ; nop                            }
+  xm.vlashr a, tmpA
+{ xm.ldap t3, .L_ln_2           ; xm.vladd t3}
+{ nop                             ; xm.vlmul0 t3}
+
+{ mv t3, vec_x              ; xm.vstr a}
+{ nop                             ; xm.vldr t3}
+  la t3, vpu_vec_0x00000002
+xm.vlsat t3
+  la t3, vpu_vec_neg_0x40000000
+{ addi tmpB,sp, (SP_VEC_X1)*4    ; xm.vladd t3}
+
+#undef mantA
+#undef mantB
+
+{ addi vec_x,sp, (SP_VEC_X2)*4   ; xm.vstr vec_x}
+{ nop                            ; xm.vlmul0 tmpB} // (x-1.0)^2
+{ addi vec_x,sp, (SP_VEC_X3)*4   ; xm.vstr vec_x}
+{ nop                            ; xm.vlmul0 tmpB} // (x-1.0)^3
+{ addi vec_x,sp, (SP_VEC_X4)*4   ; xm.vstr vec_x}
+{ nop                            ; xm.vlmul0 tmpB} // (x-1.0)^4
+{ addi vec_x,sp, (SP_VEC_X5)*4   ; xm.vstr vec_x}
+{ li tmpA, 6                     ; xm.vlmul0 tmpB} // (x-1.0)^5
+{ addi vec_x,sp, (SP_VEC_X6)*4   ; xm.vstr vec_x}
+{ xm.ldap t3, .L_ps_coef5        ; xm.vlmul0 tmpB} // (x-1.0)^6
+{ addi tmpB,sp, (SP_VEC_X1)*4    ; xm.vstr vec_x}
+
+  xm.vlashr tmpB, tmpA                                         // vR[] = coef[0] * x
+{ xm.ldap t3, .L_ps_coef4       ; xm.vldc t3} // vC[] = coef[5]
+{ addi vec_x,sp, (SP_VEC_X5)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[5] * x^6
+{ xm.ldap t3, .L_ps_coef3       ; xm.vldc t3} // vC[] = coef[4]
+{ addi vec_x,sp, (SP_VEC_X4)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[4] * x^5
+{ xm.ldap t3, .L_ps_coef2       ; xm.vldc t3} // vC[] = coef[3]
+{ addi vec_x,sp, (SP_VEC_X3)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[3] * x^4
+{ xm.ldap t3, .L_ps_coef1       ; xm.vldc t3} // vC[] = coef[2]
+{ addi vec_x,sp, (SP_VEC_X2)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[2] * x^3
+{ nop                             ; xm.vldc t3} // vC[] = coef[1]
+{ addi vec_x,sp, (SP_VEC_X1)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[1] * x^2
+
+{ nop                             ; xm.vladd a}
+{ nop                             ; xm.vstr a}
+
+// Any inputs that were 0 should become INT32_MIN
+  la t3, vpu_vec_0x00000001
+{ nop                             ; xm.vldr t3} 
+{ nop                             ; xm.vlsub b} 
+{ nop                             ; xm.vdepth1                     } 
+{ nop                             ; xm.vstr vec_x}
+{ nop                             ; lw tmpA,0          ( vec_x)}
+{ mv tmpB, tmpA              ; nop                             }
+  xm.zip tmpB, tmpA, 0
+  mv tmpB, tmpA
+  xm.zip tmpB, tmpA, 0
+  la t3, vpu_vec_0x80000000
+{ nop                             ; xm.vldr t3}
+  xm.vstrpv a, tmpA
+
+
+.L_finish:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series.S b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series.S
new file mode 100644
index 00000000..27a8421a
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series.S
@@ -0,0 +1,87 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+
+/*  
+    void chunk_q30_power_series(
+        int32_t a[VPU_INT32_EPV],
+        const q2_30 b[VPU_INT32_EPV],
+        const int32_t coef[],
+        const unsigned term_count);
+*/
+#define FUNCTION_NAME   chunk_q30_power_series
+#define NSTACKWORDS     (8 + 2 * 8 + 4)
+
+#define VEC_POW   (NSTACKWORDS - 8-4)
+#define VEC_ACC   (NSTACKWORDS - 16-4)
+
+#define a           x10
+#define b           x11
+#define coef        x12
+#define len         x13
+#define vec_pow     x18
+#define vec_acc     x19
+#define tmp         x20
+#define _32         x21
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+{ addi len, len, -1             ; nop                             }
+  xm.stdsp  s7,s6,16
+
+{ li t3, 0                  ; xm.vldc coef}
+{ nop                             ; xm.vsetc t3}
+lui t3, %hi(vpu_vec_0x40000000)
+  addi t3,t3, %lo(vpu_vec_0x40000000)
+{ addi vec_pow,sp, (VEC_POW)*4   ; xm.vldr t3}
+{ addi vec_acc,sp, (VEC_ACC)*4   ; xm.vstr vec_pow}
+{ li _32, 32                 ; xm.vclrdr                      }
+{ add coef, coef, _32         ; xm.vlmacc0 vec_pow}
+{ mv t3, vec_pow            ; xm.vstr vec_acc}
+
+  .L_loop_top:
+  { nop                             ; xm.vldr t3}
+  { nop                             ; xm.vlmul0 b}
+  { mv t3, vec_acc            ; xm.vstr vec_pow}
+  { nop                             ; xm.vldr t3}
+  { add coef, coef, _32         ; xm.vldc coef}
+  { addi len, len, -1             ; xm.vlmacc0 vec_pow}
+  { nop                             ; xm.vstr vec_acc}
+  { mv t3, vec_pow            ; xm.bt len, .L_loop_top         }
+
+{ nop                             ; xm.vstr a}
+
+.L_finish:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s7,s6,16
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series_v2.S b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series_v2.S
new file mode 100644
index 00000000..3e4d48a5
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series_v2.S
@@ -0,0 +1,108 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+
+/*  
+  The difference between this and chunk_q30_power_series() is that this one doesn't require
+  the coefficient vector to contain redundant elements for each vector index. Instead, this version
+  will broadcast the coefficient to a chunk. For this reason it is significantly slower, but it is
+  also less wasteful of memory.
+
+  NOTE: This hasn't (yet) been officially added to the API
+
+    void chunk_q30_power_series_v2(
+        int32_t a[],
+        const q2_30 b[],
+        const int32_t coef[],
+        const unsigned term_count);
+*/
+#define FUNCTION_NAME   chunk_q20_power_series
+#define NSTACKWORDS     (12 + 4 * 8 + 4)
+
+#define VEC_POW   (NSTACKWORDS - 8-4)
+#define VEC_ACC   (NSTACKWORDS - 16-4)
+#define VEC_TMP   (NSTACKWORDS - 24-4) // -->  [coef, coef, 0, 0, 0, 0, 0, 0]
+                                     // (last six elements must stay zeros)
+#define VEC_COEF  (NSTACKWORDS - 30-4) // -->  [coef, coef, coef, coef, coef, coef, coef, coef]
+                                     // (overlaps VEC_TMP and that's fine)
+
+#define a           x10
+#define b           x11
+#define coef        x12
+#define len         x13
+#define vec_pow     x18
+#define vec_acc     x19
+#define vec_coef    x20
+#define tmp         x21
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+{ addi tmp,sp, (VEC_TMP)*4       ; xm.vclrdr                      }
+  xm.stdsp  s7,s6,16
+{ li t3, 0                  ; xm.vstd tmp}
+{ addi len, len, -1             ; xm.vsetc t3}
+{ addi vec_pow,sp, (VEC_POW)*4   ; lw t3,0            ( coef)}
+  xm.stdi  t3,t3, 0(tmp)
+{ addi vec_acc,sp, (VEC_ACC)*4   ; xm.vldd tmp}
+{ addi tmp,sp, (VEC_COEF)*4      ; xm.vfttf                       }
+{ addi coef, coef, 4           ; xm.vstd vec_coef}
+{ nop                             ; xm.vldc vec_coef}
+lui t3, %hi(vpu_vec_0x40000000)
+  addi t3,t3, %lo(vpu_vec_0x40000000)
+{ nop                             ; xm.vldr t3}
+{ nop                             ; xm.vstr vec_pow}
+{ nop                             ; xm.vclrdr                      }
+{ nop                             ; xm.vlmacc0 vec_pow}
+{ mv t3, vec_pow            ; xm.vstr vec_acc}
+
+  .L_loop_top:
+  { nop                             ; xm.vldr t3}
+  { nop                             ; xm.vlmul0 b}
+  { mv t3, vec_acc            ; xm.vstr vec_pow}
+  { nop                             ; xm.vldr t3}
+  { addi coef, coef, 4           ; lw t3,0            ( coef)}
+    xm.stdi  t3,t3, 0(tmp)
+  { nop                             ; xm.vldd tmp}
+  { nop                             ; xm.vfttf                       }
+  { nop                             ; xm.vstd vec_coef}
+  { nop                             ; xm.vldc vec_coef}
+  { addi len, len, -1             ; xm.vlmacc0 vec_pow}
+  { nop                             ; xm.vstr vec_acc}
+  { mv t3, vec_pow            ; xm.bt len, .L_loop_top         }
+
+{ nop                             ; xm.vstr a}
+
+.L_finish:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s7,s6,16
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct12_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct12_s32.S
new file mode 100644
index 00000000..034055d7
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct12_s32.S
@@ -0,0 +1,140 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform an 12-point forward DCT.
+
+void dct12_forward(
+    int32_t y[12],
+    const int32_t x[12]);
+
+*/
+
+#define FUNCTION_NAME   dct12_forward
+#define NSTACKWORDS 12
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define VEC_TMP       (NSTACKWORDS - 8 - 2)
+
+#define y         x10
+#define x         x11
+
+#define a         x12
+#define b         x13
+#define c         x18
+#define d         x19
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+
+  // Reverse the tail half of x[], placing it in y[]
+  // leave the head half where it is
+  xm.lddi  a,b, 24(x)
+  xm.lddi  c,d, 40 (x)// Load these in case x and y are the same address
+  xm.stdi  b,a, 40(y)
+  xm.lddi  a,b, 32(x)
+  li t3, 0x80
+  xm.stdi  b,a, 32(y)
+  xm.stdi  d,c, 24(y)
+
+// Take the sum and difference of the head and (flipped) tail
+// also dividing by 2 so that we don't saturate.
+{ li a, 24                   ; nop                             }
+{ add t3, y, a               ; xm.vsetc t3}
+{ nop                             ; xm.vldr t3}
+{ addi t3,sp, (VEC_TMP)*4       ; xm.vladsb x}
+{ add x, y, a                 ; xm.vstd t3}
+
+#undef x  //no longer needed
+#undef y
+// now x10 points at the first half of y and x11 at the second half
+#define left    x10
+#define right   x11
+{ nop                             ; xm.vstr left}
+{ xm.mkmski a, 24                 ; xm.vldr t3}
+lui t3, %hi(dct12_lut)
+  addi t3,t3, %lo(dct12_lut)
+{ addi t3,sp, (VEC_TMP)*4       ; xm.vlmul0 t3}
+  xm.vstrpv t3, a
+
+
+// DCT the the sum of the head and tail, placing the result in
+// the second half of y[] (for now)
+{ li b , 32                  ; xm.vldc left}
+lui t3, %hi(dct6_matrix)
+  addi t3,t3, %lo(dct6_matrix)
+{ mv a, t3                  ; xm.vclrdr                      }
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ nop                             ; xm.vlmaccr0 t3}
+lui t3, %hi(vpu_vec_0x10000000) 
+  addi t3,t3, %lo(vpu_vec_0x10000000) // ashr vR[] right 2 bits
+{ xm.mkmski t3, 24               ; xm.vlmul0 t3}
+  xm.vstrpv right, t3 // put in right half so left half is clear
+                       // when we start interleaving them
+
+// DCT the difference of head and tail, placing the result 
+// on the stack
+{ addi t3,sp, (VEC_TMP)*4       ; xm.vclrdr                      }
+{ mv t3, a                  ; xm.vldc t3} // DCT right half (from stack vec)
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ nop                             ; xm.vlmaccr0 t3}
+lui t3, %hi(vpu_vec_0x20000000) 
+  addi t3,t3, %lo(vpu_vec_0x20000000) // shr vR[] right 1 bit (to simplify deconvolution)
+{ addi t3,sp, (VEC_TMP)*4       ; xm.vlmul0 t3}
+{ nop                             ; xm.vstr t3} // store on stack so we don't clobber
+                                                              // anything when we interleave
+
+// Now simultaneously rearrange stuff in memory while deconvolving the
+// second DCT that we did
+  xm.lddi  b,d, 0(t3)
+  srai b, b, 1
+{ nop                             ; lw a,0             ( right)}
+  xm.stdi  a,b, 0(left)
+{ sub d, d, b                 ; lw a,4             ( right)}
+  xm.stdi  a,d, 8(left)
+  xm.lddi  b,c, 8(t3)
+{ sub b, b, d                 ; lw a,8             ( right)}
+  xm.stdi  a,b, 16(left)
+{ sub c, c, b                 ; lw a,12             ( right)}
+  xm.stdi  a,c, 24(left)
+  xm.lddi  b,d, 16   (t3)
+{ sub b, b, c                 ; lw a,16             ( right)}
+  xm.stdi  a,b, 32(left)
+{ sub d, d, b                 ; lw a,20             ( right)}
+  xm.stdi  a,d, 40(left)
+  
+  xm.lddsp  s3,s2,0
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct16_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct16_s32.S
new file mode 100644
index 00000000..1820d37e
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct16_s32.S
@@ -0,0 +1,157 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform an 16-point forward DCT.
+
+void dct16_forward(
+    int32_t y[16],
+    const int32_t x[16]);
+
+*/
+
+#define FUNCTION_NAME   dct16_forward
+#define NSTACKWORDS 12
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define VEC_TMP       (NSTACKWORDS - 8 - 2)
+
+#define y         x10
+#define x         x11
+
+#define a         x12
+#define b         x13
+#define c         x18
+#define d         x19
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+
+  // Reverse the tail half of x[], placing it in y[]
+  // leave the head half where it is
+  xm.lddi  a,b, 32(x)
+  xm.lddi  c,d, 56(x)
+  xm.stdi  b,a, 56(y)
+  xm.stdi  d,c, 32(y)
+
+  xm.lddi  a,b, 40(x)
+  xm.lddi  c,d, 48(x)
+  xm.stdi  b,a, 48(y)
+  xm.stdi  d,c, 40(y)
+
+  li t3, 0x80
+  
+// Take the sum and difference of the head and (flipped) tail
+// also dividing by 2 so that we don't saturate.
+{ li a, 32                   ; nop                             }
+{ add t3, y, a               ; xm.vsetc t3}
+{ nop                             ; xm.vldr t3}
+{ addi t3,sp, (VEC_TMP)*4       ; xm.vladsb x}
+{ add x, y, a                 ; xm.vstd t3}
+
+#undef x  //no longer needed
+#undef y
+// now x10 points at the first half of y and x11 at the second half
+#define left    x10
+#define right   x11
+{ nop                             ; xm.vstr left}
+{ nop                             ; xm.vldr t3}
+lui t3, %hi(dct16_lut)
+  addi t3,t3, %lo(dct16_lut)
+{ addi t3,sp, (VEC_TMP)*4       ; xm.vlmul0 t3}
+{ nop                             ; xm.vstr t3}
+
+
+// DCT the the sum of the head and tail, placing the result in
+// the second half of y[] (for now)
+{ li b, 32                   ; xm.vldc left}
+lui t3, %hi(dct8_matrix)
+  addi t3,t3, %lo(dct8_matrix)
+{ mv a, t3                  ; xm.vclrdr                      }
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+xm.vlsat t3  
+lui t3, %hi(vpu_vec_0x10000000) 
+  addi t3,t3, %lo(vpu_vec_0x10000000) // ashr vR[] right 2 bits
+{ xm.mkmski t3, 24               ; xm.vlmul0 t3}
+{ nop                             ; xm.vstr right}
+
+// DCT the difference of head and tail, placing the result 
+// on the stack
+{ addi t3,sp, (VEC_TMP)*4       ; xm.vclrdr                      }
+{ mv t3, a                  ; xm.vldc t3} // DCT right half (from stack vec)
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+{ add t3, t3, b             ; xm.vlmaccr0 t3}
+xm.vlsat t3  
+lui t3, %hi(vpu_vec_0x20000000) 
+  addi t3,t3, %lo(vpu_vec_0x20000000) // shr vR[] right 1 bit (to simplify deconvolution)
+{ addi t3,sp, (VEC_TMP)*4       ; xm.vlmul0 t3}
+{ nop                             ; xm.vstr t3} // store on stack so we don't clobber
+                                                              // anything when we interleave
+
+// Now simultaneously rearrange stuff in memory while deconvolving the
+// second DCT that we did
+  xm.lddi  b,d, 0(t3)
+  srai c, b, 1
+{ nop                             ; lw a,0             ( right)}
+  xm.stdi  a,c, 0(left)
+{ sub c, d, c                 ; lw a,4             ( right)}
+  xm.stdi  a,c, 8(left)
+
+  xm.lddi  b,d, 8(t3)
+{ sub c, b, c                 ; lw a,8             ( right)}
+  xm.stdi  a,c, 16(left)
+{ sub c, d, c                 ; lw a,12             ( right)}
+  xm.stdi  a,c, 24(left)
+
+  xm.lddi  b,d, 16(t3)
+{ sub c, b, c                 ; lw a,16             ( right)}
+  xm.stdi  a,c, 32(left)
+{ sub c, d, c                 ; lw a,20             ( right)}
+  xm.stdi  a,c, 40(left)
+
+  xm.lddi  b,d, 24(t3)
+{ sub c, b, c                 ; lw a,24             ( right)}
+  xm.stdi  a,c, 48(left)
+{ sub c, d, c                 ; lw a,28             ( right)}
+  xm.stdi  a,c, 56(left)
+
+  xm.lddsp  s3,s2,0
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct24_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct24_s32.S
new file mode 100644
index 00000000..21156961
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct24_s32.S
@@ -0,0 +1,178 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform an 24-point forward DCT.
+
+Computed directly by multiplying by the DCT matrix. The output has elements ordered 
+so that when used in recursive DCT computation the bit-reversed indexing can be used
+to deconvolve those that need it.
+
+void dct24_forward(
+    int32_t y[24],
+    const int32_t x[24]);
+
+*/
+
+#define FUNCTION_NAME   dct24_forward
+#define NSTACKWORDS 44
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define STK_VEC_HEAD       (NSTACKWORDS - 16-2)
+#define STK_VEC_TAIL       (NSTACKWORDS - 8-2)
+
+#define y         x10
+#define x         x11
+
+#define a         x12
+#define b         x13
+#define c         x18
+#define d         x19
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,8
+
+  // Reverse the tail half of x[], placing it in y[]
+  // leave the head half where it is
+  xm.lddi  a,b, 48(x)
+  xm.lddi  c,d, 88(x)
+  xm.stdi  b,a, 88(y)
+  xm.stdi  d,c, 48(y)
+  
+  xm.lddi  a,b, 56(x)
+  xm.lddi  c,d, 80(x)
+  xm.stdi  b,a, 80(y)
+  xm.stdi  d,c, 56(y)
+
+  li t3, 0x80
+
+  xm.lddi  a,b, 64(x)
+  xm.lddi  c,d, 72(x)
+  xm.stdi  b,a, 72(y)
+  xm.stdi  d,c, 64(y)
+
+// Take the sum and difference between the head and (flipped) tail
+// the sum goes into y[0:12], the difference goes into tmp[0:12]
+{ li a, 48                   ; addi d,sp, (STK_VEC_HEAD)*4    }
+{ add t3, y, a               ; xm.vsetc t3} // x28 <-- &y[12]
+{ li b, 32                   ; xm.vldr t3} // vR[] <-- y[12:20]
+{ nop                             ; xm.vladsb x} // vR[] <-- sum; vD[] <-- diff
+{ nop                             ; xm.vstd d} // tmp[0:8] <-- diff[0:8]
+{ add t3, t3, b             ; xm.vstr y} // y[0:8] <-- sum[0:8]
+{ add x, x, b                 ; xm.vldr t3} // vR[] <-- y[20:24]
+{ addi t3,sp, (STK_VEC_TAIL)*4  ; xm.vladsb x} // sum/diff; orig x no longer needed
+{ add t3, y, b               ; xm.vstd t3} // tmp[8:12] <-- diff[8:12]
+{ add x, y, a                 ; xm.vstr t3} // y[8:12] <-- sum[8:12]
+
+// multiply tail component by DCT LUT
+lui t3, %hi(dct24_lut)
+  addi t3,t3, %lo(dct24_lut)
+{ nop                             ; xm.vldr t3}
+{ add a, d, b                 ; xm.vlmul0 d}
+{ add t3, t3, b             ; xm.vstr d}
+{ nop                             ; xm.vldr t3}
+{ nop                             ; xm.vlmul0 a}
+{ nop                             ; xm.vstr a}
+
+#define left    x10  // Contains &y[0]
+#define right   x11  // Contains &y[12]
+
+// perform 12-point DCTs on the head and tail sub-sequences.
+//  y[0:12] (head) --> DCT12 --> y[12:24]
+//  tmp[0:12] (tail) --> DCT12 --> tmp[0:12]
+// The head is being moved to the end of y so that it isn't in
+// the way when we need to do deconvolution
+  xm.stdsp  a1,a0,16
+  xm.stdsp  a3,a2,24
+
+// DCT12(head[])
+{ mv a0, right               ; mv a1, left                }
+lui t3, %hi(dct12_forward)
+  addi t3,t3, %lo(dct12_forward)
+{ nop                             ; jalr t3                     }
+// DCT12(tail[])
+{ xm.ldawsp a0, STK_VEC_HEAD*4   ; nop}
+{ xm.ldawsp a1, STK_VEC_HEAD*4   ; nop}
+lui t3, %hi(dct12_forward)   
+  addi t3,t3, %lo(dct12_forward)   
+{ nop                             ; jalr t3                     }
+  xm.lddsp  a1,a0,16
+  xm.lddsp  a3,a2,24
+
+// Before deconvolution, right-shift the head vector 2 bits, and 
+// right-shift the tail vector 1 bit
+{ li a, 1                    ; xm.mkmski c, 16                 }
+  xm.vlashr d, a
+{ addi t3,sp, (STK_VEC_TAIL)*4  ; xm.vstr d}
+  xm.vlashr t3, a
+{ li a, 2                    ; xm.vstr t3}
+  xm.vlashr right, a
+{ add t3, right, b           ; xm.vstr right}
+  xm.vlashr t3, a
+  xm.vstrpv t3, c
+
+// Finally, begin deconvolving and interleaving
+
+{ mv t3, d                  ; nop                             }
+  xm.lddi  b,d, 0(t3)
+  srai b, b, 1
+{ nop                             ; lw a,0             ( right)}
+  xm.stdi  a,b, 0(left)
+{ sub d, d, b                 ; lw a,4             ( right)}
+  xm.stdi  a,d, 8(left)
+  xm.lddi  b,c, 8(t3)
+{ sub b, b, d                 ; lw a,8             ( right)}
+  xm.stdi  a,b, 16(left)
+{ sub c, c, b                 ; lw a,12             ( right)}
+  xm.stdi  a,c, 24(left)
+  xm.lddi  b,d, 16   (t3)
+{ sub b, b, c                 ; lw a,16             ( right)}
+  xm.stdi  a,b, 32(left)
+{ sub d, d, b                 ; lw a,20             ( right)}
+  xm.stdi  a,d, 40(left)
+  
+  xm.lddi  b,c, 24(t3)
+{ sub b, b, d                 ; lw a,24             ( right)}
+  xm.stdi  a,b, 48(left)
+{ sub c, c, b                 ; lw a,28             ( right)}
+  xm.stdi  a,c, 56(left)
+  
+  xm.lddi  b,d, 32(t3)
+{ sub b, b, c                 ; lw a,32             ( right)}
+  xm.stdi  a,b, 64(left)
+{ sub d, d, b                 ; lw a,36             ( right)}
+  xm.stdi  a,d, 72(left)
+  
+  xm.lddi  b,c, 40(t3)
+{ sub b, b, d                 ; lw a,40             ( right)}
+  xm.stdi  a,b, 80(left)
+{ sub c, c, b                 ; lw a,44             ( right)}
+  xm.stdi  a,c, 88(left)
+  
+  xm.lddsp  s3,s2,8
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+
+
+	
+.set	FUNCTION_NAME.nstackwords,(NSTACKWORDS+12)
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct6_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct6_s32.S
new file mode 100644
index 00000000..0d57c0f6
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct6_s32.S
@@ -0,0 +1,65 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform an 6-point forward DCT.
+
+Computed directly by multiplying by the DCT matrix. The output has elements ordered 
+so that when used in recursive DCT computation the bit-reversed indexing can be used
+to deconvolve those that need it.
+
+void dct6_forward(
+    int32_t y[6],
+    const int32_t x[6]);
+
+*/
+
+#define FUNCTION_NAME   dct6_forward
+#define NSTACKWORDS 0
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define y         x10
+#define x         x11
+#define mask      x12
+#define _32       x13
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+{ li t3, 0                  ; li _32, 32                 }
+{ xm.mkmski mask, 24              ; xm.vsetc t3}
+{ nop                             ; xm.vldc x}
+lui t3, %hi(dct6_matrix)
+  addi t3,t3, %lo(dct6_matrix)
+{ nop                             ; xm.vclrdr                      }
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ nop                             ; xm.vlmaccr0 t3}
+  xm.vstrpv y, mask
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct8_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct8_s32.S
new file mode 100644
index 00000000..806ad559
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct8_s32.S
@@ -0,0 +1,67 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform an 8-point forward DCT.
+
+Computed directly by multiplying by the DCT matrix.
+
+headroom_t dct8_forward(
+    int32_t y[8],
+    const int32_t x[8]);
+
+*/
+
+#define FUNCTION_NAME   dct8_forward
+#define NSTACKWORDS 0
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+#define y       x10
+#define x       x11
+#define tmp     x12
+#define _32     x13
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+{ li t3, 0                  ; li _32, 32                 }
+{ nop                             ; xm.vsetc t3}
+lui t3, %hi(dct8_matrix)
+  addi t3,t3, %lo(dct8_matrix)
+{ nop                             ; xm.vclrdr                      }
+{ nop                             ; xm.vldc x}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+xm.vlsat t3 
+{ nop                             ; xm.vstr y}
+{ li a0, 31                  ; xm.vgetc t3}
+{ xm.zexti t3, 5                 ; nop                             }
+{ sub a0, a0, t3             ; xm.retsp (NSTACKWORDS)*4           }
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct_adsb_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct_adsb_s32.S
new file mode 100644
index 00000000..287c18b6
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct_adsb_s32.S
@@ -0,0 +1,78 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+headroom_t dct_adsb_s32(
+    int32_t sums[],
+    int32_t diffs[],
+    const int32_t head[],
+    const int32_t tail[],
+    const unsigned chunks,
+    const int32_t dct_lut[]);
+
+*/
+
+#define FUNCTION_NAME   dct_adsb_s32
+#define NSTACKWORDS 8
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define sums        x10
+#define diffs       x11
+#define head        x12
+#define tail        x13
+#define chunks      x18
+#define lut         x19
+#define _32         x20
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+{ li _32, 32                 ; nop}
+mv chunks, a4
+
+{ xm.shli t3, _32, 2             ; nop}
+mv lut, a5
+
+{ mv t3, tail               ; xm.vsetc t3}
+
+.L_loop_top:
+  { addi chunks, chunks, -1       ; xm.vldr t3}
+  { add tail, tail, _32         ; xm.vladsb head}
+  { add head, head, _32         ; xm.vstr sums}
+  { mv t3, lut                ; xm.vstd diffs}
+  { add lut, lut, _32           ; xm.vldr t3}
+  { add sums, sums, _32         ; xm.vlmul0 diffs}
+  { add diffs, diffs, _32       ; xm.vstr diffs}
+  { mv t3, tail               ; xm.bt chunks, .L_loop_top      }
+.L_loop_bot:
+
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s3,s2,0
+{ li a0, 31                ; xm.vgetc t3}
+{ xm.zexti t3, 5               ; nop                             }
+{ sub a0, a0, t3           ; xm.retsp (NSTACKWORDS)*4           }
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct_deconvolve_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct_deconvolve_s32.S
new file mode 100644
index 00000000..f55c7f00
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct_deconvolve_s32.S
@@ -0,0 +1,81 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+length must be a multiple of 8
+
+void dct_deconvolve_s32(
+    int32_t res[],
+    const int32_t B[],
+    const int32_t D[],
+    const unsigned length);
+
+*/
+
+#define FUNCTION_NAME   dct_deconvolve_s32
+#define NSTACKWORDS 8
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define res     x10
+#define B       x11
+#define D       x12
+#define len     x13
+#define even    x18
+#define a       x19
+#define b       x20
+#define c       x21
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+// Just set it up so that c contains half D[0], so that when
+// it's subtracted from D[0] we get (D[0] >> 1)
+  { srli len, len, 3             ; lw c,0                 ( D)}
+    srai c, c, 1
+  { li t3, 16                 ; xm.bu .L_loop_top              }
+
+.p2align 4
+.L_loop_top:
+    xm.lddi  a,b, 0(D)
+  { sub a, a, c                 ; lw even,0              ( B)}
+    xm.stdi  even,a, 0(res)
+  { sub b, b, a                 ; lw even,4              ( B)}
+    xm.stdi  even,b, 8(res)
+    xm.lddi  a,c, 8(D)
+  { sub a, a, b                 ; lw even,8              ( B)}
+    xm.stdi  even,a, 16(res)
+  { sub c, c, a                 ; lw even,12              ( B)}
+    xm.stdi  even,c, 24(res)
+  { add D, D, t3               ; addi len, len, -1             }
+  { add res, res, t3           ; add B, B, t3               }
+  { add res, res, t3           ; xm.bt len, .L_loop_top         }
+.L_loop_bot:
+
+.L_finish:
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s3,s2,0  
+  xm.retsp (NSTACKWORDS)*4  /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS  \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct6_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct6_s32.S
new file mode 100644
index 00000000..0187b7b9
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct6_s32.S
@@ -0,0 +1,64 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform an 6-point inverse DCT.
+
+Computed directly by multiplying by the DCT matrix.
+
+headroom_t dct6_inverse(
+    int32_t y[6],
+    const int32_t x[6]);
+
+*/
+
+#define FUNCTION_NAME   dct6_inverse
+#define NSTACKWORDS 0
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define y         x10
+#define x         x11
+#define mask      x12
+#define _32       x13
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+{ li t3, 0                  ; li _32, 32                 }
+{ xm.mkmski mask, 24              ; xm.vsetc t3}
+{ nop                             ; xm.vldc x}
+lui t3, %hi(idct6_matrix)
+  addi t3,t3, %lo(idct6_matrix)
+{ nop                             ; xm.vclrdr                      }
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+xm.vlsat t3 
+  xm.vstrpv y, mask
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct8_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct8_s32.S
new file mode 100644
index 00000000..09cfa0df
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct8_s32.S
@@ -0,0 +1,65 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform an 8-point inverse DCT.
+
+Computed directly by multiplying by the IDCT matrix.
+
+headroom_t dct8_inverse(
+    int32_t y[8],
+    const int32_t x[8]);
+
+*/
+
+#define FUNCTION_NAME   dct8_inverse
+#define NSTACKWORDS 0
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+#define y       x10
+#define x       x11
+#define tmp     x12
+#define _32     x13
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+{ li t3, 0                  ; li _32, 32                 }
+{ nop                             ; xm.vsetc t3}
+lui t3, %hi(idct8_matrix)
+  addi t3,t3, %lo(idct8_matrix)
+{ nop                             ; xm.vclrdr                      }
+{ nop                             ; xm.vldc x}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+{ add t3, t3, _32           ; xm.vlmaccr0 t3}
+xm.vlsat t3 
+{ nop                             ; xm.vstr y}
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct_adsb.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_adsb.S
new file mode 100644
index 00000000..63f3bafa
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_adsb.S
@@ -0,0 +1,73 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+void idct_adsb(
+    int32_t sums[],
+    int32_t diffs[],
+    const int32_t head[],
+    const int32_t tail[],
+    const unsigned chunks);
+
+*/
+
+#define FUNCTION_NAME   idct_adsb
+#define NSTACKWORDS 8
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define STK_CHUNKS  (NSTACKWORDS+1)
+
+#define sums        x10
+#define diffs       x11
+#define s           x12
+#define t_tilde     x13
+#define chunks      x18
+#define _32         x19
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+//{ li _32, 32                 ; lw chunks, (STK_CHUNKS)*4  (sp)}
+  { li _32, 32                 ; nop }
+  mv chunks, a4
+
+
+{ li t3, 0                  ; nop                             }
+{ mv t3, t_tilde            ; xm.vsetc t3}
+
+.L_loop_top:
+  { addi chunks, chunks, -1       ; xm.vldr t3}
+  { add t3, t3, _32           ; xm.vladsb s}
+  { add s, s, _32               ; xm.vstr sums}
+  { add sums, sums, _32         ; xm.vstd diffs}
+  { add diffs, diffs, _32       ; xm.bt chunks, .L_loop_top      }
+.L_loop_bot:
+
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s3,s2,0
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct_convolve.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_convolve.S
new file mode 100644
index 00000000..612b94b7
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_convolve.S
@@ -0,0 +1,80 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Applies the convolution needed for the recursive IDCT.
+
+given x[], the result is
+
+  y[0] = x[0]
+  y[1:] = 0.5*(y[1:] + y[0:-1])
+
+Each "chunk" is 8 elements, so if the data isn't a multiple of 8 elements
+you'll need buffer space at the end of the data that can be safely clobbered.
+
+void idct_convolve(
+    int32_t y[],
+    const int32_t x[],
+    const unsigned chunks);
+
+*/
+
+#define FUNCTION_NAME   idct_convolve
+#define NSTACKWORDS 4
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+#define y        x10
+#define x        x11
+#define chunks   x12
+#define _32      x13
+#define tmp      x18
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+
+  // This has to start at the end or else values will get clobbered
+  // early if it's done in-place
+
+  li t3, 0x80  // 32-bit mode with SHR=1 on VLADSB
+{ xm.shli s3, chunks, 3           ; xm.vsetc t3}
+{ li _32, 32                 ; lw tmp,0               ( x)}
+  sh2add x, s3, x
+  sh2add y, s3, y
+{ sub x, x, _32               ; sub y, y, _32               }
+{ addi t3, x, -4               ; nop                             }
+
+.L_loop_top:
+  { addi chunks, chunks, -1       ; xm.vldr t3}
+  { sub x, x, _32               ; xm.vladsb x}
+  { sub y, y, _32               ; xm.vstr y}
+  { addi t3, x, -4               ; xm.bt chunks, .L_loop_top      }
+.L_loop_bot:
+  
+  sw tmp,32 ( y)// y is pointing 8 words before where it started
+  
+  xm.lddsp  s3,s2,0
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct_scale.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_scale.S
new file mode 100644
index 00000000..22110856
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_scale.S
@@ -0,0 +1,66 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+
+
+void idct_scale(
+    int32_t x[],
+    const int32_t idct_lut[],
+    const unsigned chunks,
+    const right_shift_t shr);
+
+*/
+
+#define FUNCTION_NAME   idct_scale
+#define NSTACKWORDS 4
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 2
+
+
+#define x        x10
+#define lut      x11
+#define chunks   x12
+#define shr      x13
+#define _32      x18
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+
+{ li t3, 0                  ; li _32, 32                 }
+{ mv t3, lut                ; xm.vsetc t3}
+
+.L_loop_top:
+  { add t3, t3, _32           ; xm.vldr t3}
+  { nop                             ; xm.vlmul0 x}
+  { nop                             ; xm.vstr x}
+    xm.vlashr x, shr
+  { addi chunks, chunks, -1       ; xm.vstr x}
+  { add x, x, _32               ; xm.bt chunks, .L_loop_top      }
+.L_loop_bot:
+  
+  xm.lddsp  s3,s2,0
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageA.S b/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageA.S
new file mode 100644
index 00000000..8c0ff228
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageA.S
@@ -0,0 +1,187 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform the first step of a 2D 8-by-8 forward or inverse DCT on 8-bit data.
+
+The first step takes an 8-bit tensor x[8][8] as input and populates a 16-bit
+tensor y[8][8] as output.
+
+The operation is to perform an 8-point DCT on each row of x[][] to get
+an intermediate tensor tmp[][], and then populate y[][] with the TRANSPOSE of
+tmp[][].
+
+Whether the forward or inverse DCT is performed depends on whether the
+dct_matrix[][] argument points to dct8_matrix_16bit[][] or 
+idct8_matrix_16bit[][].
+
+headroom_t dct8x8_stageA(
+    int16_t y[8][8],
+    const int8_t x[8][8],
+    const int16_t matrix[8][16]);
+
+*/
+
+#define FUNCTION_NAME   dct8x8_stageA
+#define NSTACKWORDS 36
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+#define STK_BUFF      (NSTACKWORDS - 32-1)
+#define STK_LAST_ROW  (NSTACKWORDS - 4-1) // will point to last row of 16-bit buffered input matrix
+
+#define y       x10
+#define x       x11
+#define mat     x12
+#  define _16     mat
+#define buff    x13
+#define count   x18
+#define _32     x19
+
+// Because a 16-bit DCT matrix is used and 8-bit inputs, the maximum accumulator value is
+// 2^24, and we don't want to output anything larger than 2^14 (otherwise dct8x8_part2()
+// could saturate the accumulators) so we down-shift the accumulators 10 bits.
+.L_sat_vec: .short 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10 /* Translation error on this line: unexpected token at position 12. */ 
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  
+////// Expand to 16-bits
+
+  li t3, 0x200 // 8-bit mode
+{ addi buff,sp, (STK_BUFF)*4     ; xm.vsetc t3}
+lui t3, %hi(vpu_vec_0x01)
+  addi t3,t3, %lo(vpu_vec_0x01)
+{ li t3, 16                 ; xm.vldc t3}
+
+{ li _32, 32                 ; xm.vclrdr                      }
+{ add count, x, t3           ; xm.vlmacc0 x}
+{ add buff, buff, _32         ; xm.vstr buff}
+
+{ nop                             ; xm.vclrdr                      }
+{ add count, count, t3       ; xm.vlmacc0 count}
+{ add buff, buff, _32         ; xm.vstr buff}
+
+{ nop                             ; xm.vclrdr                      }
+{ add count, count, t3       ; xm.vlmacc0 count}
+{ add buff, buff, _32         ; xm.vstr buff}
+
+{ nop                             ; xm.vclrdr                      }
+{ nop                             ; xm.vlmacc0 count}
+{ nop                             ; xm.vstr buff}
+  
+////// Perform eight 8-point, 16-bit DCTs
+
+// The trick here is that we'll transpose while computing the
+// output. Instead of loading the row from x[] into vC[], we'll
+// load a row from the DCT matrix, and each vlmaccr will apply
+// to a different row of x[].
+// Then when we saturate and store that in y[], we'll have
+// what would have been the first COLUMN of output as the first
+// ROW of output.
+
+// The other catch is that the data needs to be masked to avoid
+// including the wrong stuff in the accumulators. This is easily
+// handled by just padding the matrix with 0's (then it will be
+// the same size as the 32-bit DCT8 matrix).
+
+// Finally, we'll compute two rows of output per loop iteration,
+// since we have enough accumulators to do so.
+
+// (also, we don't need the original x[] pointer anymore, so we'll
+//  put something else in there)
+#undef x
+#define sat  x11
+
+  li t3, 0x100 // 16-bit mode
+{ nop                             ; xm.vsetc t3}
+lui t3, %hi(.L_sat_vec)
+  addi t3,t3, %lo(.L_sat_vec)
+{ li count, 4                ; mv sat, t3                }
+{ li _16, 16                 ; mv t3, mat                } // NOTE: _16 and mat are the same register!
+.L_loop_top:
+  { add t3, t3, _32           ; xm.vclrdr                      }
+  { addi buff,sp, (STK_LAST_ROW)*4 ; xm.vldc t3}
+  
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { sub t3, t3, _32           ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { addi buff,sp, (STK_LAST_ROW)*4 ; xm.vldc t3}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { sub buff, buff, _16         ; nop}
+  
+  { addi count, count, -1         ; xm.vlmaccr0 buff}
+  xm.vlmaccr1 buff
+  { add t3, t3, _32           ; nop}
+   xm.vlsat sat
+  { add t3, t3, _32           ; xm.vstr y}
+  { add y, y, _32               ; nop}
+  bnez count, .L_loop_top   
+.L_loop_bot:
+
+  xm.lddsp  s3,s2,0
+  
+{ li a0, 15                  ; xm.vgetc t3}
+{ xm.zexti t3, 5                 ; nop                             }
+{ sub a0, a0, t3             ; xm.retsp (NSTACKWORDS)*4           }
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageB.S b/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageB.S
new file mode 100644
index 00000000..23b13fb4
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageB.S
@@ -0,0 +1,191 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+Perform the final step of a 2D 8-by-8 forward or inverse DCT on 8-bit data.
+
+The first step takes an 8-bit tensor x[8][8] as input and populates a 16-bit
+tensor y[8][8] as output. The first step is implemented as dct8x8_stageA().
+
+The final step takes a 16-bit tensor x[8][8] as input and populates an 8-bit
+tensor y[8][8] as output.
+
+The operation is to perform an 8-point DCT on each row of x[][] to get
+an intermediate tensor tmp[][], and then populate y[][] with the TRANSPOSE of
+tmp[][].
+
+Whether the forward or inverse DCT is performed depends on whether the
+matrix[][] argument points to dct8_matrix_16bit[][] or 
+idct8_matrix_16bit[][].
+
+headroom_t dct8_inversex8_stageB(
+    int8_t y[8][8],
+    const int16_t x[8][8],
+    const int16_t matrix[8][16],
+    const right_shift_t sat);
+
+*/
+
+#define FUNCTION_NAME   dct8x8_stageB
+#define NSTACKWORDS 40
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+#define STK_BUFF      (NSTACKWORDS - 32-1)
+
+#define y       x10
+#define x       x11
+#define mat     x12
+#  define _32     mat
+#define buff    x13
+#define count   x18
+#define A       x19
+#define mask    x20
+#define _16     x21
+#define sat     x22
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+  xm.stdsp  s7,s6,16
+  
+  li t3, 0x100 // 16-bit mode
+{ li _16, 16                 ; xm.vsetc t3}
+{ add a3, a3, _16             ; add t3, a3, _16            }
+xm.zip t3, a3, 4
+
+// Store VLSAT argument vector in y[] (which won't be needed
+// until after all VLSATs are done).
+  xm.stdi  a3,a3, 0(y)
+  xm.stdi  a3,a3, 8(y)
+  xm.stdi  a3,a3, 16(y)
+  xm.stdi  a3,a3, 24(y)
+
+////// Perform eight 8-point, 16-bit DCTs
+
+// We'll place the result on the stack as 16-bit values because it
+// will be faster than switching between modes while DCTing.
+// We'll again do the transpose in-flight.
+// The stack space doesn't matter because stageA uses the same amount
+
+{ li count, 4                ; addi buff,sp, (STK_BUFF)*4     }
+{ nop                             ; li t3, 28                 }
+// We need to traverse the rows of x[] backwards to get elements
+// in the right output order.
+  sh2add x, t3, x
+{ li _32, 32                 ; mv t3, mat                } // NOTE: _32 and mat are the same register!
+
+.L_loop_top:
+  { add t3, t3, _32           ; xm.vclrdr                      }
+  { mv A, x                    ; xm.vldc t3}
+
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { sub t3, t3, _32           ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { mv A, x                    ; xm.vldc t3}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+  { nop                         ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { sub A, A, _16         ; nop}
+
+  { add t3, t3, _32           ; xm.vlmaccr0 A}
+  xm.vlmaccr1 A
+  { add t3, t3, _32           ; nop}
+  xm.vlsat y
+  { addi count, count, -1         ; xm.vstr buff}
+  { add buff, buff, _32         ; nop}
+  bnez count, .L_loop_top       
+.L_loop_bot:
+
+// We could get the headroom right now on the 16-bit values, but
+// there's a chance that VDEPTH8 causes a value to round away from
+// zero in a way that decreases headroom.
+
+// Reduce depth to 8 bits, moving to y[].
+{ addi t3,sp, (STK_BUFF)*4        ; nop                             }
+{ add t3, t3, _32             ; xm.vldr t3}
+{ nop                               ; xm.vdepth8                     }
+{ add y, y, _16                 ; xm.vstr y}
+{ add t3, t3, _32             ; xm.vldr t3}
+{ nop                               ; xm.vdepth8                     }
+{ add y, y, _16                 ; xm.vstr y}
+{ add t3, t3, _32             ; xm.vldr t3}
+{ xm.mkmski mask, 16                ; xm.vdepth8                     }
+{ add y, y, _16                 ; xm.vstr y}
+{ xm.shli t3, _32, 4 /*8-bit mode*/; xm.vldr t3}
+{ add _16, _32, _16             ; xm.vdepth8                     }
+  xm.vstrpv y, mask
+
+// Load/store one last time to get headroom
+{ sub y, y, _16                 ; xm.vsetc t3}
+{ nop                               ; xm.vldd y}
+{ add y, y, _32                 ; xm.vstd y}
+{ nop                               ; xm.vldd y}
+{ nop                               ; xm.vstd y}
+
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s7,s6,16
+
+{ li a0, 7                   ; xm.vgetc t3}
+{ xm.zexti t3, 5                 ; nop                             }
+{ sub a0, a0, t3             ; xm.retsp (NSTACKWORDS)*4           }
+
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/dct/vect_s32_flip.S b/lib_xcore_math/src/arch/vx4b/dct/vect_s32_flip.S
new file mode 100644
index 00000000..71d5eb32
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/dct/vect_s32_flip.S
@@ -0,0 +1,54 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+
+
+/*  
+
+void vect_s32_flip(
+    int32_t y[],
+    const int32_t x[],
+    const unsigned length);
+
+*/
+
+#define FUNCTION_NAME   vect_s32_flip
+#define NSTACKWORDS 0
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+#define y       x10
+#define x       x11
+#define len     x12
+#define a       x13
+#define b       x28
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+{ addi x, x, -4                 ; addi y, y, -8                 }
+.L_loop_top:
+  { addi y, y, 4                 ; lw a,4                 ( x)}
+  { addi x, x, 4                 ; xm.ldw b, len               ( x)}
+    xm.stw a,len(y) /* XAT Warning: "Falling back on assumption: the int < 12 for the integer value of the item at position 2 in the instruction's operands in stwi a, y,len                       \nMessage: The offset can be encoded in s2rus immediate" */
+  { addi len, len, -2             ; sw b,4                 ( y)}
+  { nop                             ; xm.bt len, .L_loop_top         }
+.L_loop_bottom:
+  xm.retsp (NSTACKWORDS)*4  /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS  \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+	
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/fft/dif_fft.S b/lib_xcore_math/src/arch/vx4b/fft/dif_fft.S
new file mode 100644
index 00000000..43c4bcac
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/fft/dif_fft.S
@@ -0,0 +1,285 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+    void fft_dif_forward (
+        complex_s32_t * x, 
+        unsigned n, 
+        headroom_t* hr, 
+        exponent_t* exp);
+    
+    void fft_dif_inverse (
+        complex_s32_t* x, 
+        unsigned n, 
+        headroom_t* hr, 
+        exponent_t* exp);
+*/
+
+
+#define NSTACKWORDS (64)
+
+#define STACK_N         (8)
+#define STACK_EXP       (9)
+
+#define x_p 			x10 
+#define n 				x11 
+#define hr_p            x12
+#define exp_minus_one   hr_p
+#define twiddle_lut_p 	x13
+#define _32             x18
+#define j               x19
+#define k               x20
+#define a               x21
+#define b               x22
+#define exp_modifier    x23
+#define s               x24
+
+
+.text
+.globl	fft_dif_forward
+.type	fft_dif_forward,@function
+
+.p2align 2
+fft_dif_forward:
+
+	xm.entsp (NSTACKWORDS)*4
+	xm.stdsp  x18,x12,0*8
+	xm.stdsp  x19,x20,1*8
+	xm.stdsp  x21,x22,2*8
+	xm.stdsp  x23,x24,3*8
+
+    { nop                                           ;   sw a3, (STACK_EXP)*4                   (sp)}
+        la t3, xmath_dif_fft_lut_size
+    {   li s3, 32                              ;   lw s2,0                          ( t3)}
+		la t3, xmath_dif_fft_lut
+    {   add twiddle_lut_p, t3, s2              ;   xm.shli s2, n, 3                            }
+    {   add twiddle_lut_p, twiddle_lut_p, s3    ; nop                                           }
+    {   sub twiddle_lut_p, twiddle_lut_p, s2    ; nop                                           }
+
+dif_fft_impl_start:
+	{   li s, 31                               ;   lw t3,0                        ( hr_p)}
+	{   sub s, s, t3                           ;   srli j, n, 2                             }
+	
+	la t3, fft_hr_lut	
+	
+	{   li _32, 32                              ;   xm.ldw t3,s(t3)}
+	{   mv exp_modifier, t3                     ;   xm.vsetc t3}
+
+	{   srli s, n, 3                             ;   sw n, (STACK_N)*4                      (sp)}
+	{   mv t3, x_p;                             ;   xm.brff s, dif_fft_last_two_rounds_4_point   } 
+
+	    mul b, n, _32   
+	{   srli b, b, 3                             ;   mv a, _32                              } //astew: `shl b, n, 2`
+	{   srli n, n, 4                             ; nop                                           } 
+	
+	la t3, fft_hr_lut	
+		
+	
+	{   mv s, t3                              ;   sub k, b, _32                           }
+
+dif_fft_round_loop:
+	dif_fft_outer_loop:
+		{   add t3, x_p, k                         ;   mv j, a                                }
+	 	{   add twiddle_lut_p, twiddle_lut_p, _32   ;   xm.vldc twiddle_lut_p}
+
+		dif_fft_inner_loop:
+			{   add t3, t3, b                         ;   xm.vldr t3}
+			{   sub t3, t3, b                         ;   xm.vladsb t3}
+			{   add t3, t3, b                         ;   xm.vstr t3}
+			{   sub j, j, _32                           ;   xm.vcmr0                                    }
+			{ nop                                           ;   xm.vcmi0                                    }
+			{ nop                                           ;   xm.vstr t3}
+			{   add t3, t3, b                         ;   xm.bt j, dif_fft_inner_loop                }
+
+		{   sub k, k, _32                         ;   xm.bt k, dif_fft_outer_loop                }
+
+	{   srli b, b, 1                              ;   xm.vgetc t3}
+	{   xm.shli a, a, 1                              ;   xm.zexti t3, 5                             }
+	{   sub k, b, _32                             ;   xm.ldw t3, t3                         (s)}
+	{   add exp_modifier, exp_modifier, t3        ;   xm.vsetc t3}
+
+	{   srli n, n, 1                              ;   xm.bt n, dif_fft_round_loop                }
+	
+dif_fft_last_two_rounds:
+	{   mv t3, x_p                                ;   lw n, (STACK_N)*4                      (sp)}
+	{   srli j, n, 2                              ; nop                                           }	
+
+dif_fft_last_two_rounds_loop:
+	{ nop                                         ;   xm.vldr t3}
+	{   addi j, j, -1                             ;   xm.vftff                                   }
+	{   add t3, t3, _32                           ;   xm.vstr t3}
+
+	dif_fft_last_two_rounds_4_point:
+	{ nop                                         ;   xm.vldr t3}
+	{   addi j, j, -1                             ;   xm.vftff                                   }
+	{   add t3, t3, _32                           ;   xm.vstr t3}
+	{ nop                                         ;   xm.bt j, dif_fft_last_two_rounds_loop      }
+
+dif_fft_done:
+	
+	//update the hr
+	{   li s, 31                                   ;   xm.vgetc t3}
+	{   xm.zexti t3, 5                             ;   nop                                                                     }
+	{   sub s, s, t3	                           ;   nop                                           }
+	xm.lddsp  x18,x12,0*8
+	{ nop                                          ;   sw s,0                          ( hr_p)}
+
+	//update the exponent
+	{ nop                                          ;   lw t3, (STACK_EXP)*4 (sp)}
+	{ nop                                          ;   lw s,0                           ( t3)}
+	
+	srai exp_modifier, exp_modifier, 16
+	{   add s, s, exp_modifier                     ;   nop                                           }
+	{ nop                                          ;   sw s,0                           ( t3)}
+
+
+	
+	xm.lddsp  x19,x20,1*8
+	xm.lddsp  x21,x22,2*8
+	xm.lddsp  x23,x24,3*8
+
+	xm.retsp (NSTACKWORDS)*4
+
+
+	.set	fft_dif_forward.nstackwords,NSTACKWORDS
+	.globl	fft_dif_forward.nstackwords
+	.set	fft_dif_forward.maxcores,1
+	.globl	fft_dif_forward.maxcores
+	.set	fft_dif_forward.maxtimers,0
+	.globl	fft_dif_forward.maxtimers
+	.set	fft_dif_forward.maxchanends,0
+	.globl	fft_dif_forward.maxchanends
+.L_fft_dif_forward:
+	.size	fft_dif_forward, .L_fft_dif_forward-fft_dif_forward
+
+
+
+
+
+
+
+
+
+
+	.text
+	.globl	        fft_dif_inverse
+	.type	        fft_dif_inverse, @function
+
+.p2align 2
+fft_dif_inverse:
+	xm.entsp (NSTACKWORDS)*4
+	xm.stdsp  x18,x12,0*8
+	xm.stdsp  x19,x20,1*8
+	xm.stdsp  x21,x22,2*8
+	xm.stdsp  x23,x24,3*8
+    
+    { nop                                           ;   sw a3, (STACK_EXP)*4                   (sp)}
+		la t3, xmath_dif_fft_lut_size
+    {   li s3, 32                              ;   lw s2,0                          ( t3)}
+		la t3, xmath_dif_fft_lut
+    {   add twiddle_lut_p, t3, s2              ;   xm.shli s2, n, 3                            }
+    {   add twiddle_lut_p, twiddle_lut_p, s3    ; nop                                           }
+    {   sub twiddle_lut_p, twiddle_lut_p, s2    ; nop                                           }
+
+dif_ifft_impl_start:
+	{   li s, 31                               ;   lw t3,0                        ( hr_p)}
+	{   sub s, s, t3                           ;   srli j, n, 2                             }
+	
+	la t3, fft_hr_lut	
+	
+	{   li _32, 32                             ;   xm.ldw t3,s( t3)}
+	{   mv exp_modifier, t3                   ;   xm.vsetc t3}
+
+	{   srli s, n, 3                             ;   sw n, (STACK_N)*4                      (sp)}
+	{   mv t3, x_p;                           ;   xm.brff s, dif_ifft_last_two_rounds_4_point  } /* Translation error on this line: unexpected token at position 45. */ 
+
+	mul b, n, _32                                                                         /* Translation error on this line: unexpected token at position 89. */ 
+	{   srli b, b, 3                             ;   mv a, _32                              }
+	{   sub k, b, _32                           ;   srli n, n, 4                             }
+	
+	la t3, fft_hr_lut	
+
+	{   mv s, t3                              ;   lw exp_minus_one,0               ( t3)}
+
+dif_ifft_round_loop:
+	dif_ifft_outer_loop:
+		{   add t3, x_p, k                         ;   mv j, a                                }
+	 	{   add twiddle_lut_p, twiddle_lut_p, _32   ;   xm.vldc twiddle_lut_p}
+
+		dif_ifft_inner_loop:
+			{   add t3, t3, b                         ;   xm.vldr t3}
+			{   sub t3, t3, b                         ;   xm.vladsb t3}
+			{   add t3, t3, b                         ;   xm.vstr t3 }
+			{   sub j, j, _32                           ;   xm.vcmcr0                                   }
+			{ nop                                           ;   xm.vcmci0                                   }
+			{ nop                                           ;   xm.vstr t3 }
+			{   add t3, t3, b                         ;   xm.bt j, dif_ifft_inner_loop               }
+
+		{   sub k, k, _32                           ;   xm.bt k, dif_ifft_outer_loop               }
+
+	{   add exp_modifier, exp_modifier, exp_minus_one      ;nop                                 } /* Translation error on this line: unexpected token at position 89. */ 
+	{   srli b, b, 1                             ;   xm.vgetc t3}
+	{   xm.shli a, a, 1                             ;   xm.zexti t3, 5                             }
+	{   sub k, b, _32                           ;   xm.ldw t3, t3                         (s)}
+	{   add exp_modifier, exp_modifier, t3     ;   xm.vsetc t3}
+
+	{   srli n, n, 1                             ;   xm.bt n, dif_ifft_round_loop               }
+	
+dif_ifft_last_two_rounds:
+	{   nop;nop /*TODO make this an align*/                                                    } /* Translation error on this line: unexpected token at position 89. */ 
+	{   mv t3, x_p                            ;   lw n, (STACK_N)*4                      (sp)}
+	{   srli j, n, 2                             ; nop                                           }	
+
+dif_ifft_last_two_rounds_loop:
+	{ nop                                           ;   xm.vldr t3}
+	{   addi j, j, -1                             ;   xm.vftfb                                   }
+	{   add t3, t3, _32                       ;   xm.vstr t3}
+
+	dif_ifft_last_two_rounds_4_point:
+	{ nop                                           ;   xm.vldr t3}
+	{   addi j, j, -1                             ;   xm.vftfb                                   }
+	{   add t3, t3, _32                       ;   xm.vstr t3}
+	{ nop                                           ;   xm.bt j, dif_ifft_last_two_rounds_loop     }
+
+dif_ifft_done:
+	//update the hr
+	{   li s, 31                               ;   xm.vgetc t3}
+	{   xm.zexti t3, 5                             ; nop                                           }
+    {   sub s, s, t3                           ; nop                                           }	
+       xm.lddsp x18,x12,0*8                                                             /* Translation error on this line: unexpected token at position 92. */ 
+    { nop                                           ;   sw s,0                          ( hr_p)}
+
+	//update the exponent
+	{ nop                                           ;   lw t3, (STACK_EXP)*4                  (sp)}
+	{ nop                                           ;   lw s,0                           ( t3)}
+	srai exp_modifier, exp_modifier, 16
+	//{   ashr exp_modifier, exp_modifier, 16                                                 } /* Translation error on this line: unexpected token at position 89. */ 
+	addi exp_modifier, exp_modifier, -2                                           /* Translation error on this line: unexpected token at position 89. */ 
+	{   add s, s, exp_modifier                  ; nop                                           }
+	{ nop                                           ;   sw s,0                           ( t3)}
+
+	//restore the regs
+	xm.lddsp  x19,x20,1*8
+	xm.lddsp  x21,x22,2*8
+	xm.lddsp  x23,x24,3*8
+    
+	 xm.retsp (NSTACKWORDS)*4
+
+	.set	fft_dif_inverse.nstackwords,NSTACKWORDS
+	.globl	fft_dif_inverse.nstackwords
+	.set	fft_dif_inverse.maxcores,1
+	.globl	fft_dif_inverse.maxcores
+	.set	fft_dif_inverse.maxtimers,0
+	.globl	fft_dif_inverse.maxtimers
+	.set	fft_dif_inverse.maxchanends,0
+	.globl	fft_dif_inverse.maxchanends
+.L_fft_dif_inverse:
+	.size	fft_dif_inverse, .L_fft_dif_inverse-fft_dif_inverse
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/fft/dit_fft.S b/lib_xcore_math/src/arch/vx4b/fft/dit_fft.S
new file mode 100644
index 00000000..b1aacd9c
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/fft/dit_fft.S
@@ -0,0 +1,316 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+    void fft_dit_forward (
+        complex_s32_t * x, 
+        unsigned n, 
+        headroom_t* hr, 
+        exponent_t* exp);
+    
+    void fft_dit_inverse (
+        complex_s32_t* x, 
+        unsigned n, 
+        headroom_t* hr, 
+        exponent_t* exp);
+*/
+
+#define NSTACKWORDS (32)
+
+#define STACK_EXP       (8)
+
+#define x_p 			x10  //astew: Value is constant. Could be thrown on stack to free up a register.
+#define n 				x11 
+#define hr_p            x12  //astew: register currently only used at very beginning and end.
+#define twiddle_lut_p 	x13
+// #define M               x18
+#define _32             x18
+
+#define j               x19
+#define k               x20
+
+#define a               x21
+#define b               x22
+
+#define exp_modifier    x23
+
+#define s               x24
+// #define t               x28
+
+.text
+.globl	fft_dit_forward
+.type	fft_dit_forward,@function
+
+.p2align 2
+fft_dit_forward:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  hr_p,s2,0
+        xm.stdsp  s4,s3,8
+        xm.stdsp  s6,s5,16
+        xm.stdsp  s8,s7,24
+    
+lui t3, %hi(xmath_dit_fft_lut)
+        addi t3,t3, %lo(xmath_dit_fft_lut)
+    {   mv twiddle_lut_p, t3                  ;   sw a3, (STACK_EXP)*4                   (sp)}
+
+    {   li exp_modifier, 0                     ;   lw t3,0                        ( hr_p)}
+    {   addi t3, t3, -1                         ;   xm.brff t3, dit_fft_impl_0_bits_hr          }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   addi t3, t3, -1                         ;   xm.brff t3, dit_fft_impl_1_bits_hr          }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   addi t3, t3, -1                         ;   xm.brff t3, dit_fft_impl_2_bits_hr          }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   addi t3, t3, -1                         ;   xm.brff t3, dit_fft_impl_3_bits_hr          }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu dit_fft_impl_4_bits_hr               }
+
+#define VEC_SHR     0x80
+#define VEC_SHL     0x40
+#define VEC_SH0     0x00
+
+dit_fft_impl_0_bits_hr:
+       li x28, VEC_SHR                          /* Translation error on this line: unexpected token at position 48. */ //VEC_SHR
+    {   addi exp_modifier, exp_modifier, 1       ;   xm.bu dit_fft_impl_start                   }
+
+dit_fft_impl_1_bits_hr:
+       li x28, VEC_SHR                         /* Translation error on this line: unexpected token at position 48. */ //VEC_SHR
+    {   addi exp_modifier, exp_modifier, 1       ;   xm.bu dit_fft_impl_start                   }
+
+dit_fft_impl_2_bits_hr:
+       li x28, VEC_SHR                          /* Translation error on this line: unexpected token at position 48. */ //VEC_SHR
+    {   addi exp_modifier, exp_modifier, 1       ;   xm.bu dit_fft_impl_start                   }
+
+dit_fft_impl_3_bits_hr:
+      li x28, VEC_SH0                          /* Translation error on this line: unexpected token at position 48. */ //VEC_SH0
+    {   addi exp_modifier, exp_modifier, 0       ;   xm.bu dit_fft_impl_start                   }
+
+dit_fft_impl_4_bits_hr:
+     li x28, VEC_SHL                          /* Translation error on this line: unexpected token at position 48. */ //VEC_SHL
+    {   addi exp_modifier, exp_modifier, -1       ;   xm.bu dit_fft_impl_start                   }
+
+
+dit_fft_impl_start:
+    // Iterate the dit_fft_first_two_rounds_loop loop  n/4 times (via j) because vD holds 4 complex elements
+    {   srli j, n, 2                             ;   xm.vsetc t3}
+    // have x28 point at the beginning of the data vector
+    {   mv t3, x_p                            ;   li _32, 32                             }
+
+
+    dit_fft_first_two_rounds_loop:
+        // Load 4 complex elements from the data vector (already have indexes bit-reversed)
+        { nop                                           ;   xm.vldd t3}   
+        // Do FFT thing and decrement loop counter
+        {   addi j, j, -1                             ;   xm.vfttf                                   }   
+        // Write back to data vector, and move to point at next 4 elements
+        {   add t3, t3, _32                       ;   xm.vstd t3}   
+        // Loop if there's more. Set s to n/4
+        {   srli s, n, 2                             ;   xm.bt j, dit_fft_first_two_rounds_loop     }
+
+    // s = (n/4)-1;  if n == 4, skip the main loop.
+    {   addi s, s, -1                             ; nop                                           }
+    { nop                                           ;   xm.brff s, dit_fft_done                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    // b = 32       ; b will shift left in each iteration of the `dit_fft_round_loop` loop
+    // a = n / 8    ; a will shift right in each iteration of the `dit_fft_round_loop` loop
+    // n = n / 16   ; after this we'll do log2(n)+1 executions of `dit_fft_round_loop`
+    {   mv b, _32                              ; nop                                           } // <-- astew: seems unnecessary. Can probably drop an indstruction here.
+    {   srli n, n, 4                             ;   srli a, n, 3                             }
+
+    dit_fft_round_loop:
+       la x28, fft_hr_lut	
+        {   mv s, t3                              ;   xm.vgetc t3}
+        { nop                                           ;   xm.zexti t3, 5                             }
+        {   sub k, b, _32                          ;   xm.ldw t3, t3                         (s)}
+            srai s, t3, 16
+        {   add exp_modifier, exp_modifier, s       ;   xm.vsetc t3}
+
+        dit_fft_outer_loop:
+            // j is our inner loop iterator variable
+            // set s to point k bytes into the data buffer
+            {   mv j, a                                ;   add s, x_p, k                           }
+            {   add twiddle_lut_p, twiddle_lut_p, _32   ;   xm.vldc twiddle_lut_p}
+            {   add t3, s, b                           ; nop                                           } ////this might be able to go
+
+            dit_fft_inner_loop:
+                { nop                                           ;   xm.vldd t3}
+                { nop                                           ;   xm.vcmr0                                    }
+                { nop                                           ;   xm.vcmi0                                    }
+                {   addi j, j, -1                             ;   xm.vladsb s}
+                {   add s, s, b                             ;   xm.vstr s }
+                {   add s, s, b                             ;   xm.vstd t3}
+                {   add t3, s, b                           ;   xm.bt j, dit_fft_inner_loop                }
+
+            {   sub k, k, _32                           ;   xm.bt k, dit_fft_outer_loop                }
+
+        {   xm.shli b, b, 1                             ;   srli a, a, 1                             }
+        {   srli n, n, 1                             ;   xm.bt n, dit_fft_round_loop                }
+    
+dit_fft_done:
+    
+    //update the hr
+    {   xm.vgetc t3;   li s, 31                               }
+        xm.zexti x28, 5                              /* Translation error on this line: unexpected token at position 48. */ 
+        sub s, s, t3
+        xm.lddsp  hr_p,s2,0
+        sw s,0( hr_p)
+
+    //update the exponent
+    { nop                                           ;   lw t3, (STACK_EXP)*4                  (sp)}
+    { nop                                           ;   lw s,0                           ( t3)}
+    {   add s, s, exp_modifier                  ; nop                                           }
+    { nop                                           ;   sw s,0                           ( t3)}
+
+        //restore the regs
+        xm.lddsp  s4,s3,8
+        xm.lddsp  s6,s5,16
+        xm.lddsp  s8,s7,24
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+    
+    .set	    fft_dit_forward.nstackwords,NSTACKWORDS
+    .globl	    fft_dit_forward.nstackwords
+    .set	    fft_dit_forward.maxcores,1
+    .globl	    fft_dit_forward.maxcores
+    .set	    fft_dit_forward.maxtimers,0
+    .globl	    fft_dit_forward.maxtimers
+    .set	    fft_dit_forward.maxchanends,0
+    .globl	    fft_dit_forward.maxchanends
+
+.Ltmp0:
+    .size	fft_dit_forward, .Ltmp0-fft_dit_forward
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    .text
+    .globl	        fft_dit_inverse
+    .type	        fft_dit_inverse, @function
+
+.p2align 2
+fft_dit_inverse:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  hr_p,s2,0
+        xm.stdsp  s4,s3,8
+        xm.stdsp  s6,s5,16
+        xm.stdsp  s8,s7,24
+    
+lui t3, %hi(xmath_dit_fft_lut)
+        addi t3,t3, %lo(xmath_dit_fft_lut)
+    {   mv twiddle_lut_p, t3                  ;   sw a3, (STACK_EXP)*4                   (sp)}
+
+    {   li exp_modifier, 0                     ;   lw t3,0                        ( hr_p)}
+    {   addi t3, t3, -1                         ;   xm.brff t3, dit_ifft_impl_0_bits_hr         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   addi t3, t3, -1                         ;   xm.brff t3, dit_ifft_impl_1_bits_hr         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   addi t3, t3, -1                         ;   xm.brff t3, dit_ifft_impl_2_bits_hr         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   addi t3, t3, -1                         ;   xm.brff t3, dit_ifft_impl_3_bits_hr         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu dit_ifft_impl_4_bits_hr              }
+
+    dit_ifft_impl_0_bits_hr:
+          li x28, 0x80                             /* Translation error on this line: unexpected token at position 52. */ //VEC_SHR
+        {   addi exp_modifier, exp_modifier, 1       ;   xm.bu dit_ifft_impl_start                  }
+
+    dit_ifft_impl_1_bits_hr:
+           li x28, 0x80                             /* Translation error on this line: unexpected token at position 52. */ //VEC_SHR
+        {   addi exp_modifier, exp_modifier, 1       ;   xm.bu dit_ifft_impl_start                  }
+
+    dit_ifft_impl_2_bits_hr:
+           li x28, 0x80                             /* Translation error on this line: unexpected token at position 52. */ //VEC_SHR
+        {   addi exp_modifier, exp_modifier, 1       ;   xm.bu dit_ifft_impl_start                  }
+
+    dit_ifft_impl_3_bits_hr:
+          li x28, 0x00                            /* Translation error on this line: unexpected token at position 52. */ //VEC_SH0
+        {   addi exp_modifier, exp_modifier, 0       ;   xm.bu dit_ifft_impl_start                  }
+
+    dit_ifft_impl_4_bits_hr:
+          li x28, 0x40                             /* Translation error on this line: unexpected token at position 52. */ //VEC_SHL
+        {   addi exp_modifier, exp_modifier, -1       ;   xm.bu dit_ifft_impl_start                  }
+
+dit_ifft_impl_start:
+    {   srli j, n, 2                             ;   xm.vsetc t3}
+    {   mv t3, x_p                            ;   xm.ldcu _32, 8*4                            }
+
+
+dit_ifft_first_two_rounds_loop:
+    { nop                                           ;   xm.vldd t3}
+    {   addi j, j, -1                             ;   xm.vfttb                                   }
+    {   add t3, t3, _32                       ;   xm.vstd t3}
+    {   srli s, n, 2                             ;   xm.bt j, dit_ifft_first_two_rounds_loop    }
+
+    {   addi s, s, -1                             ; nop                                           }
+    {   addi exp_modifier, exp_modifier, -2       ;   xm.brff s, dit_ifft_done                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    {   mv a, n                                ;   mv b, _32                              }
+    {   srli a, a, 3                             ;   srli n, n, 4                             }
+
+dit_ifft_round_loop:
+    la x28, fft_hr_lut	
+    {   mv s, t3                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   addi exp_modifier, exp_modifier, -1       }
+    {   sub k, b, _32                           ;   xm.ldw t3, t3                         (s)}
+        srai s, t3, 16
+    {   add exp_modifier, exp_modifier, s       ;   xm.vsetc t3}
+
+    dit_ifft_outer_loop:
+        {   add s, x_p, k                           ;   mv j, a                                }
+         {   add twiddle_lut_p, twiddle_lut_p, _32   ;   xm.vldc twiddle_lut_p}
+        {   add t3, s, b                           ; nop                                           } ////this might be able to go
+
+        dit_ifft_inner_loop:
+            { nop                                           ;   xm.vldd t3}
+            { nop                                           ;   xm.vcmcr0                                   }
+            { nop                                           ;   xm.vcmci0                                   }
+            {   addi j, j, -1                             ;   xm.vladsb s}
+            {   add s, s, b                             ;   xm.vstr s }
+            {   add s, s, b                             ;   xm.vstd t3}
+            {   add t3, s, b                           ;   xm.bt j, dit_ifft_inner_loop               }
+
+        {   sub k, k, _32                           ;   xm.bt k, dit_ifft_outer_loop               }
+
+    {   srli a, a, 1                             ;   xm.shli b, b, 1                             }
+    {   srli n, n, 1                             ;   xm.bt n, dit_ifft_round_loop               }
+    
+dit_ifft_done:
+    
+    //update the hr
+    {   li s, 31                               ;   xm.vgetc t3}
+     xm.zexti x28, 5                             /* Translation error on this line: unexpected token at position 48. */ 
+        sub s, s, t3
+        xm.lddsp  hr_p,s2,0
+        sw s,0( hr_p)
+
+    //update the exponent
+    { nop                                           ;   lw t3, (STACK_EXP)*4                  (sp)}
+    { nop                                           ;   lw s,0                           ( t3)}
+    {   add s, s, exp_modifier                  ; nop                                           }
+    { nop                                           ;   sw s,0                           ( t3)}
+
+        //restore the regs
+        xm.lddsp  s4,s3,8
+        xm.lddsp  s6,s5,16
+        xm.lddsp  s8,s7,24
+        xm.retsp (NSTACKWORDS)*4   /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS   \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+    
+    .set	    fft_dit_inverse.nstackwords,NSTACKWORDS
+    .globl	    fft_dit_inverse.nstackwords
+    .set	    fft_dit_inverse.maxcores,1
+    .globl	    fft_dit_inverse.maxcores
+    .set	    fft_dit_inverse.maxtimers,0
+    .globl	    fft_dit_inverse.maxtimers
+    .set	    fft_dit_inverse.maxchanends,0
+    .globl	    fft_dit_inverse.maxchanends
+.Ltmp1:
+    .size	fft_dit_inverse, .Ltmp1-fft_dit_inverse
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S b/lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S
new file mode 100644
index 00000000..72237451
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S
@@ -0,0 +1,24 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+//.section    .cp.rodata, "ac", @progbits
+.p2align  2
+// In the table below the LSByte indicates the shift behavior
+//  0x00 - no shift.  0x40 - left shift. 0x80 - right shift.
+
+#define NEG1_SHL    0xffff0040
+#define ZERO_SH0    0x00000000
+#define POS1_SHR    0x00010080
+
+.global fft_hr_lut
+.section .data.fft_hr_lut, "aw"
+fft_hr_lut:
+    .word NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL //  0 -  7
+    .word NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL //  8 - 15
+    .word NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL // 16 - 23
+    .word NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, ZERO_SH0, POS1_SHR, POS1_SHR, POS1_SHR // 24 - 31
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_index_bit_reversal.S b/lib_xcore_math/src/arch/vx4b/fft/fft_index_bit_reversal.S
new file mode 100644
index 00000000..27712a18
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/fft/fft_index_bit_reversal.S
@@ -0,0 +1,65 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+/*  
+
+void fft_index_bit_reversal(
+    complex_s32_t* a,
+    const unsigned length);
+*/
+
+#define FUNCTION_NAME   fft_index_bit_reversal
+#define NSTACKWORDS 8
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4
+    xm.stdsp  x20, x21, 0
+    xm.stdsp  x18, x19, 8
+    xm.clz a2, a1
+    {   addi a2, a2, 1                           ;   srli a1, a1, 1                           }
+    {   slli a1, a1, 1                           ;   xm.bu .L_loop                              }
+    
+.p2align 4
+.L_loop:   
+    {   xm.bitrev a3, a1                         ;   xm.shl t3, a1, a2                         }
+    {   xm.sltu t3, a3, t3                       ;   xm.shr a3, a3, a2                          }
+    {   addi t3, a1, -1                          ;   xm.brff t3, .L_dontswap                     }
+    xm.ldd x18, x19, a1(a0)                                                                  
+    xm.ldd x20, x21, a3(a0)                                                                  
+    xm.std x18, x19, a3(a0)                                                                  
+    xm.std x20, x21, a1(a0)                                                                
+.L_dontswap:
+    {   xm.bitrev a3, t3                         ;   xm.shl a1, t3, a2                         }
+    {   xm.sltu a1, a3, a1                       ;   xm.shr a3, a3, a2                          }
+    {   addi a1, t3, -1                          ;   xm.brff a1, .L_dontswap2                     }
+    xm.ldd x18, x19, t3(a0)                                                                 
+    xm.ldd x20, x21, a3(a0)                                                                 
+    xm.std x18, x19, a3(a0)                                                                  
+    xm.std x20, x21, t3(a0)                                                                 
+.L_dontswap2:
+    {   xm.bt a1, .L_loop    ; nop                                                                  }
+    
+    xm.lddsp  x20, x21, 0
+    xm.lddsp  x18, x19, 8
+    xm.retsp (NSTACKWORDS)*4
+
+	// RETURN_REG_HOLDER
+.set	FUNCTION_NAME.nstackwords,NSTACKWORDS
+.globl	FUNCTION_NAME.nstackwords
+.set	FUNCTION_NAME.maxcores,1
+.globl	FUNCTION_NAME.maxcores
+.set	FUNCTION_NAME.maxtimers,0
+.globl	FUNCTION_NAME.maxtimers
+.set	FUNCTION_NAME.maxchanends,0
+.globl	FUNCTION_NAME.maxchanends
+.Ltmp0:
+	.size	FUNCTION_NAME, .Ltmp0-FUNCTION_NAME    
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_mono_adjust.S b/lib_xcore_math/src/arch/vx4b/fft/fft_mono_adjust.S
new file mode 100644
index 00000000..e9c4f1c3
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/fft/fft_mono_adjust.S
@@ -0,0 +1,218 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+void fft_mono_adjust(
+    complex_s32_t* X,
+    const unsigned N,
+    const unsigned inverse);
+*/
+
+
+#define FUNCTION_NAME   fft_mono_adjust
+
+#define NSTACKVECTS     (4)
+#define NSTACKWORDS     (32 + 8*(NSTACKVECTS))
+
+#define STACK_VEC_TMP_A         (NSTACKWORDS-(8*2))
+#define STACK_VEC_TMP_B         (NSTACKWORDS-(8*3))
+#define STACK_VEC_TMP_B_CONJ    (NSTACKWORDS-(8*4))
+#define STACK_VEC_TMP           (NSTACKWORDS-(8*5))
+
+#define STACK_X0    (4)
+#define STACK_XQ    (5)
+#define STACK_X     (12)
+#define STACK_N     (13)
+#define STACK_W     (14)
+#define STACK_INV   (15)
+
+#define X           x10
+#define N           x11
+#define W           x12
+#define X_lo        x13        
+#define X_hi        x18  
+#define _32         x19
+#define i           x20
+#define pos_j_vect  x21      
+#define ones_vect   x22     
+#define conj_vect   x23     
+
+.text
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+//.call FUNCTION_NAME, vect_complex_s32_tail_reverse
+
+.p2align 4
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4
+    xm.stdsp  x18,x19,8
+    xm.stdsp  x20,x21,16
+    xm.stdsp  x22,x23,24
+    
+    {   li t3, 0                              ;   sw s8, 4                          (sp)}
+    {   addi s2, N, -8                            ;   sw a2, (STACK_INV)*4                   (sp)}
+    {   slli s2, s2, 3                           ;   xm.vsetc t3}
+
+    // W <-- &xmath_dit_fft_lut[N - 8]
+    // W <-- xmath_dit_fft_lut + ((N-8)<<3)
+lui t3, %hi(xmath_dit_fft_lut)
+        addi t3,t3, %lo(xmath_dit_fft_lut)
+    {   srli a3, N, 4                            ;   add W, t3, s2                          }
+    
+    {   srli N, N, 1                             ;   sw X, (STACK_X)*4                      (sp)}
+    // exception if N < 16. Don't bother using this with really short FFTs.
+    {   xm.assert a3                               ;   sw N, (STACK_N)*4                      (sp)}
+
+        sh2add X, N, X
+    {   srli N, N, 1                             ;   sw W, (STACK_W)*4                      (sp)}
+
+    call vect_complex_s32_tail_reverse
+    lw X, (STACK_X)*4(sp)/* Multiple XAT warnings: 'LDWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp X, STACK_X\nMessage: The offset can be encoded in sru6 immediate" */
+    lw N, (STACK_N)*4(sp)/* Multiple XAT warnings: 'LDWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp N, STACK_N\nMessage: The offset can be encoded in sru6 immediate" */
+    lw W, (STACK_W)*4(sp)/* Multiple XAT warnings: 'LDWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp W, STACK_W\nMessage: The offset can be encoded in sru6 immediate" */
+
+
+.p2align 4
+.L_body:
+    // the elements at indexes 0 and N/4 will come out of the loop wrong, but we can just store
+    // X[0] and X[N/2] and fix them after the loop.
+    {   srli i, N, 1                             ; nop                                           }
+        xm.lddi  s5,s6, 0(X)
+        xm.ldd  s7,s8, i(X)
+        xm.stdsp  s5,s6,(STACK_X0)*8
+        xm.stdsp  s7,s8,(STACK_XQ)*8
+lui t3, %hi(vpu_vec_complex_pos_j)
+        addi t3,t3, %lo(vpu_vec_complex_pos_j)
+    {   mv pos_j_vect, t3                     ; nop                                           }
+lui t3, %hi(vpu_vec_complex_ones)
+        addi t3,t3, %lo(vpu_vec_complex_ones)
+    {   mv ones_vect, t3                      ; nop                                           }
+lui t3, %hi(vpu_vec_complex_conj_op)
+        addi t3,t3, %lo(vpu_vec_complex_conj_op)
+    {   mv conj_vect, t3                      ;   li _32, 32                             }
+
+        li t3, 0x0080
+    {   slli t3, N, 2                           ;   xm.vsetc t3}
+    {   add X_hi, X, t3                        ;   mv X_lo, X                             }
+    {   srli i, N, 3                             ;   lw t3, (STACK_INV)*4                  (sp)}
+    { nop                                           ;   xm.brff t3, .L_main_loop                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   mv X_hi, X_lo                          ;   mv X_lo, X_hi                          }
+
+.L_main_loop://I want this loop to have 1 mod 4 alignment to eliminate all FNOPs
+    {   addi i, i, -1                             ;   xm.vldd pos_j_vect}
+    {   sub W, W, _32                          ;   xm.vldc W}
+    { nop                                           ;   xm.vcmr0                                   }
+
+    { nop                                           ;   xm.vcmi0                                    }
+    {   addi t3,sp, (STACK_VEC_TMP_A)*4           ;   xm.vladsb ones_vect}
+    {   addi t3,sp, (STACK_VEC_TMP_B)*4           ;   xm.vstd t3}
+    { nop                                           ;   xm.vstr t3}
+
+    {   addi t3,sp, (STACK_VEC_TMP_B_CONJ)*4      ;   xm.vlmul0 conj_vect}
+    { nop                                           ;   xm.vstr t3}
+    { nop                                           ;   xm.vldc X_lo}
+    { nop                                           ;   xm.vcmr0                                   }
+
+    {   addi t3,sp, (STACK_VEC_TMP_B)*4           ;   xm.vcmi0                                    }
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vldd t3}
+    { nop                                           ;   xm.vstr t3}
+    { nop                                           ;   xm.vldc X_hi}
+
+    { nop                                           ;   xm.vcmcr0                                   }
+    { nop                                           ;   xm.vcmci0                                    }
+    { nop                                           ;   xm.vladd t3}
+    {   addi t3,sp, (STACK_VEC_TMP_B_CONJ)*4      ;   xm.vldc X_lo}
+
+    { nop                                           ;   xm.vldd t3}
+    {   add X_lo, X_lo, _32                     ;   xm.vstr X_lo}
+    { nop                                           ;   xm.vcmcr0                                   }
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vcmci0                                    }
+
+    {   addi t3,sp, (STACK_VEC_TMP_A)*4           ;   xm.vstr t3}
+    { nop                                           ;   xm.vldc t3}
+    { nop                                           ;   xm.vldd X_hi}
+    { nop                                           ;   xm.vcmcr0                                   }
+
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vcmci0                                    }
+    { nop                                           ;   xm.vladd t3}
+    {   add X_hi, X_hi, _32                     ;   xm.vstr X_hi}
+    { nop                                           ;   xm.bt i, .L_main_loop                      }
+
+    // If we had a LUT which already holds A[k], B[k] and the complex conjugate of B[k], we can do 
+    // it in 23 instructions instead of 31  
+
+    // If it seems worthwhile, could create an alternate version of this function that does it faster,
+    // plus a function to initialize the needed table at start-up? It can be initialized based on the
+    // existing FFT table.
+
+    // {                                           ;   vldd table_A[0]                         }
+    // {   sub i, i, 1                             ;   vldc X_lo[0]                            }
+    // {                                           ;   vcmr                                    }
+    // {                                           ;   vcmi                                    }
+    // {                                           ;   vstr vec_tmp[0]                         }
+    // {                                           ;   vldd table_B[0]                         }
+    // {                                           ;   vldc X_hi[0]                            }
+    // {                                           ;   vcmcr                                   }
+    // {                                           ;   vcmci                                   }
+    // {                                           ;   vladd vec_tmp[0]                        }
+    // {                                           ;   vldd table_B_conj[0]                    }
+    // {                                           ;   vldc X_lo[0]                            }
+    // {   add X_lo, X_lo, _32                     ;   vstr X_lo[0]                            }
+    // {                                           ;   vcmcr                                   }
+    // {                                           ;   vcmci                                   }
+    // {                                           ;   vstr vec_tmp[0]                         }
+    // {   add table_A, table_A, _32               ;   vldc table_A[0]                         }
+    // {   add table_B, table_B, _32               ;   vldd X_hi[0]                            }
+    // {   add table_B_conj, table_B_conj, _32     ;   vcmcr                                   }
+    // {                                           ;   vcmci                                   }
+    // {                                           ;   vladd vec_tmp[0]                        }
+    // {   add X_hi, X_hi, _32                     ;   vstr X_hi[0]                            }
+    // {                                           ;   bt i, .L_something                      }
+
+        xm.lddsp  s5,s6,(STACK_X0)*8
+        xm.lddsp  s7,s8,(STACK_XQ)*8
+    { nop                                           ;   lw t3, (STACK_INV)*4                  (sp)}
+        sra s5, s5, t3
+        sra s6, s6, t3
+
+    {   add s5, s5, s6                          ;   sub s6, s5, s6                          }
+        xm.stdi  s5,s6, 0(X)
+    {   xm.neg s8, s8                            ;   srli i, N, 1                             }
+        xm.std  s7,s8, i(X)
+    
+
+//Finally, reverse the elements again...
+        sh2add X, N, X
+    {   srli N, N, 1                             ; nop                                           }
+
+    call vect_complex_s32_tail_reverse
+
+.L_finish:
+    { nop                                           ;   lw s8, 4                          (sp)}
+
+        xm.lddsp  x18,x19,8
+        xm.lddsp  x20,x21,16
+        xm.lddsp  x22,x23,24
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,((NSTACKWORDS) + vect_complex_s32_tail_reverse.nstackwords); /* Translation error on this line: unexpected token at position 90. */ 
+.global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+
+.L_function_end: 
+    .size FUNCTION_NAME, .L_function_end - FUNCTION_NAME
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_merge.S b/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_merge.S
new file mode 100644
index 00000000..124656a3
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_merge.S
@@ -0,0 +1,158 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+headroom_t fft_spectra_merge(
+    complex_s32_t* X,
+    const unsigned N);
+*/
+
+
+
+#define FUNCTION_NAME   fft_spectra_merge
+#define NSTACKWORDS     (16)
+
+#define XS3_CONFIG_MIN_FFT_LEN (4)
+
+#define X       x10
+#define N       x11
+
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+    {   li t3, 0                              ;   sw s8, 4                          (sp)}
+    {   srli t3, N, 3                           ;   xm.vsetc t3}
+#if (XS3_CONFIG_MIN_FFT_LEN <= 4)
+    { nop                                           ;   xm.brff t3, .L_fft_length_4                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_pre_boggle                        }
+#endif
+
+
+#if (CONFIG_MIN_FFT_LEN <= 4)
+.L_fft_length_4:
+
+    // If the FFT length is 4, just do the work here. This keeps the code below simpler.
+    { nop                                           ;   lw s2,4                            ( X)}
+    { nop                                           ;   lw s3,16                            ( X)}
+    { nop                                           ;   sw s2,16 /* X[2].re <- X[0].im */   ( X)}
+    { nop                                           ;   sw s3,4 /* X[0].im <- X[2].re */   ( X)}
+        xm.lddi  s2,s3, 8(X)
+        xm.lddi  s4,s5, 24(X)
+    {   sub s8, s2, s5                         ;   add t3, s3, s4                         }
+        xm.stdi  s8,t3, 8(X)
+    {   add s8, s2, s5                         ;   sub t3, s4, s3                         }
+        xm.stdi  s8,t3, 24(X)
+    { nop                                           ;   xm.vldd X}
+    { nop                                           ;   xm.vstd X}
+    { nop                                           ;   xm.vgetc t3}
+    {   mv s2, t3                             ;   xm.bu .L_finish2                            }
+.L_finish2:
+
+
+    {   li a0, 31                              ; nop                                           }
+    {   xm.zexti s2, 5                              ; nop                                           }
+    {   sub a0, a0, s2                          ;   lw s8, 4                          (sp)}
+
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+#endif
+
+.L_pre_boggle:
+
+#define DC_re   x12
+#define DC_im   x13
+#define Ny_re   x18
+#define Ny_im   x19
+
+    // Pre-boggle the DC and Nyquist bins so we can do everything on the VPU
+    // Wait, is it faster to just compute the results and hold onto them...?
+
+    {   srli s6, N, 1                            ; nop                                           }
+        xm.lddi  DC_re,DC_im, 0(X)
+        xm.ldd  Ny_re,Ny_im, s6(X)
+        srai DC_re, DC_re, 1
+        srai DC_im, DC_im, 1
+        srai Ny_re, Ny_re, 1
+        srai Ny_im, Ny_im, 1
+    {   xm.add s7, DC_re, DC_im                    ;   xm.sub t3, Ny_re, Ny_im                  }
+        xm.stdi  s7,t3, 0(X)
+    {   xm.add s7, Ny_re, Ny_im                    ;   xm.sub t3, DC_im, DC_re                  }
+        xm.std  s7,t3, s6(X)
+
+
+
+#define X_lo    x12
+#define X_hi    x13
+#define i       x18
+#define _32     x19
+
+    // Now go through and compute the outputs
+
+   sh2add X_hi, N, X
+
+       li x28, 0                                                                           /* Translation error on this line: unexpected token at position 92. */ 
+    {   srli i, N, 3                             ;   xm.vsetc t3}
+lui t3, %hi(vpu_vec_complex_neg_j)
+        addi t3,t3, %lo(vpu_vec_complex_neg_j)
+    {   mv X_lo, X                             ;   xm.vldc t3}
+lui t3, %hi(vpu_vec_complex_conj_op)
+        addi t3,t3, %lo(vpu_vec_complex_conj_op)
+    {   li _32, 32                             ;   xm.bu .L_syzygy                            }
+
+.p2align 4
+.L_syzygy:
+        {   addi i, i, -1                             ;   xm.vldd X_hi}
+        { nop                                           ;   xm.vcmr0                               }
+        { nop                                           ;   xm.vcmi0                                }
+        { nop                                           ;   xm.vladsb X_lo}
+        {   add X_lo, X_lo, _32                     ;   xm.vstd X_lo}
+        { nop                                           ;   xm.vlmul0 t3}
+        {   add X_hi, X_hi, _32                     ;   xm.vstr X_hi}
+        { nop                                           ;   xm.bt i, .L_syzygy                     }
+
+    
+        sh2add X, N, X
+    {   srli N, N, 1                             ;   xm.vgetc t3}
+    {   mv s2, t3                             ; nop                                           }
+        call vect_complex_s32_tail_reverse
+
+
+.L_finish:
+
+
+    {   li a0, 31                              ; nop                                           }
+    {   xm.zexti s2, 5                              ; nop                                           }
+    {   sub a0, a0, s2                          ;   lw s8, 4                          (sp)}
+
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS + vect_complex_s32_tail_reverse.nstackwords;      /* Translation error on this line: unexpected token at position 86. */ 
+                                                .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 81. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_function_end: 
+    .size FUNCTION_NAME, .L_function_end - FUNCTION_NAME
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_split.S b/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_split.S
new file mode 100644
index 00000000..d6300b4e
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_split.S
@@ -0,0 +1,150 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+headroom_t fft_spectra_split(
+    complex_s32_t* X,
+    const unsigned N);
+*/
+
+
+#define FUNCTION_NAME   fft_spectra_split
+#define NSTACKWORDS     (32)
+
+#define X       x10
+#define N       x11
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+   {   li t3, 0                              ;   sw s8, 4                          (sp)}
+    {   srli s8, N, 3                           ;   xm.vsetc t3}
+
+  #if (XS3_CONFIG_MIN_FFT_LEN <= 4)
+    { nop                                           ;   xm.bt s8, .L_split_the_spectrum           }
+  #endif
+
+#if (CONFIG_MIN_FFT_LEN <= 4)
+.L_fft_length_4:
+
+    // If the FFT length is 4, just do the work here. This keeps the code below simpler.
+    {   xm.mkmski s8, 8                            ; nop                                           }
+    {   slli t3, s8, 16                        ;   lw s2,4                            ( X)}
+    {   add s8, s8, t3                       ;   lw s3,16                            ( X)}
+    {   li t3, 1                              ;   sw s2,16 /* X[2].re <- X[0].im */   ( X)}
+    {   xm.not s8, s8                            ;   sw s3,4 /* X[0].im <- X[2].re */   ( X)}
+        xm.vlashr X, t3
+        xm.vstrpv X, s8
+        xm.lddi  s2,s3, 8(X)
+        xm.lddi  s4,s5, 24(X)
+    {   add s8, s2, s4                         ;   sub t3, s3, s5                         }
+        xm.stdi  s8,t3, 8(X)
+    {   add s8, s3, s5                         ;   sub t3, s4, s2                         }
+        xm.stdi  s8,t3, 24(X)
+    { nop                                           ;   xm.vldd X}
+    { nop                                           ;   xm.vstd X}
+    { nop                                           ;   xm.bu .L_finish                            }
+
+#endif
+
+
+
+.L_split_the_spectrum:
+
+    // First, reverse the tail
+    {   mv s2, X                               ;   mv s3, N                               }
+        sh2add X, N, X
+    {   srli N, N, 1                             ; nop                                           }
+        call vect_complex_s32_tail_reverse
+    {   mv X, s2                               ;   mv N, s3                               }
+
+#define X_lo    X
+#define i       x12
+#define _32     x13
+#define X_hi    x18
+#define DC_im   x19
+#define DC_re   x20
+#define Ny_im   x21
+#define Ny_re   x22
+
+    // x = [DC.re - Ny.im, Ny.re + DC.im, DC.re + Ny.im, -Ny.re + DC.im]
+
+    // If I set [X[0].re, X[0].im, X[K].re, X[k].im] to the vector above, then I can just compute
+    // the results for bins 0 and K along with everything else. Then I'm guaranteed that the number
+    // of elements is a multiple of 4, which means this loop will have no tail, AND it will have
+    // captured the headroom of the vector (although it will be the lesser of the lower and upper
+    // halves)
+    {   li _32, 32                             ;   srli i, N, 1                             }
+        xm.lddi  DC_re,DC_im, 0(X)
+        xm.ldd  Ny_re,Ny_im, i(X)
+    {   sub s7, DC_re, Ny_im                   ;   add t3, DC_im, Ny_re                   }
+        xm.stdi  s7,t3, 0(X)
+    {   add s7, DC_re, Ny_im                    ;   sub t3, DC_im, Ny_re                   }
+        xm.std  s7,t3, i  (X)
+
+#undef DC_re
+#undef DC_im
+#undef Ny_re
+#undef Ny_im
+
+#define conj_vec    x19
+
+
+    sh2add X_hi, N, X_lo
+       li x28, 0x0080                                                                      /* Translation error on this line: unexpected token at position 92. */ 
+    {   srli i, i, 2                             ;   xm.vsetc t3}
+lui t3, %hi(vpu_vec_complex_neg_j)
+        addi t3,t3, %lo(vpu_vec_complex_neg_j)
+    { nop                                           ;   xm.vldc t3}
+lui t3, %hi(vpu_vec_complex_conj_op)
+        addi t3,t3, %lo(vpu_vec_complex_conj_op)
+    { nop                                           ;   xm.bu .L_syzygy                            }
+
+.p2align 4
+.L_syzygy:
+        {   addi i, i, -1                             ;   xm.vldr t3}
+        { nop                                           ;   xm.vlmul0 X_hi}
+        { nop                                           ;   xm.vladsb X_lo}
+        {   add X_lo, X_lo, _32                     ;   xm.vstr X_lo}
+        { nop                                           ;   xm.vcmr0                                   }
+        { nop                                           ;   xm.vcmi0                                    }
+        {   add X_hi, X_hi, _32                     ;   xm.vstr X_hi}
+        { nop                                           ;   xm.bt i, .L_syzygy                         }
+
+
+.L_finish:
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   lw s8, 4                          (sp)}
+
+     xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS + vect_complex_s32_tail_reverse.nstackwords;      /* Translation error on this line: unexpected token at position 86. */ 
+                                                .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 81. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_function_end: 
+    .size FUNCTION_NAME, .L_function_end - FUNCTION_NAME
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/fft/tail_reverse_complex_s32.S b/lib_xcore_math/src/arch/vx4b/fft/tail_reverse_complex_s32.S
new file mode 100644
index 00000000..e3215d12
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/fft/tail_reverse_complex_s32.S
@@ -0,0 +1,106 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+    void vect_complex_s32_tail_reverse(
+        complex_s32_t* X,
+        const unsigned N);
+*/
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (32 + 0)
+
+#define FUNCTION_NAME   vect_complex_s32_tail_reverse
+
+#define X       x10
+#define N       x11
+#define X_A     x13
+#define X_C     x18
+#define mask_A  x19
+#define mask_C  x20
+#define i       x21
+#define zero    x22
+#define _16     x23
+#define X_lo    x28
+
+.text
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+    {   li t3, 0                              ;   srli s7, N, 2                            }
+    {   srli t3, N, 2                           ;   xm.vsetc t3}
+    {   srli t3, N, 3                           ;   xm.brff t3, .L_finish                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bt t3, .L_big_enough                   }
+
+        // N = 4, just reverse elements 1 and 3
+        xm.lddi  a3,s2, 8(X)
+        xm.lddi  s7,t3, 24(X)
+        xm.stdi  a3,s2, 24(X)
+        xm.stdi  s7,t3, 8(X)
+        tail .L_finish
+
+.L_big_enough:
+
+#define X_hi    X
+
+    la t3, vpu_vec_zero
+    {   srli i, N, 3                             ;   mv zero, t3                           }
+
+    {   xm.mkmski mask_A, 8                         ;   xm.vclrdr                                  }
+    {   addi X_lo, X, 8                          ;   slli mask_A, mask_A, 8                   }
+    {   li X_A, 32                             ;   slli mask_C, mask_A, 16                  }
+        sh2add X_hi, N, X
+        sh2add X_hi, N, X_hi
+    {   li _16, 16                             ;   sub X_hi, X_hi, X_A                     }
+
+.L_rev_loop:
+        {   add X_A, X_hi, _16                      ;   xm.vldc X_hi}
+        { xm.sub X_C, X_hi, _16                     ;   xm.vldr X_lo }
+        {   addi i, i, -1                           ;   xm.vlmaccr0 zero}
+        { nop                                       ;   xm.vlmaccr0 zero}
+        {   sub X_hi, X_C, _16                      ;   xm.vstr X_hi}
+            xm.vstrpv X_A, mask_A
+            xm.vstrpv X_C, mask_C
+
+
+        {   xm.add X_A, X_lo, _16      ;   xm.vstc X_lo                            }
+        {   xm.sub X_C, X_lo, _16     ;   xm.vldr X_lo                            }
+        { nop                                           ;   xm.vlmaccr0 zero}
+        { nop                                           ;   xm.vlmaccr0 zero}
+        {   add X_lo, X_A, _16                      ;   xm.vstr X_lo}
+            xm.vstrpv X_A, mask_A
+            xm.vstrpv X_C, mask_C
+        { nop                                           ;   xm.bt i, .L_rev_loop                       }
+
+.L_finish:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;  .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;               .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;              .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;            .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_func_end:
+    .size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S
new file mode 100644
index 00000000..fa638c1a
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S
@@ -0,0 +1,155 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+typedef struct {
+    unsigned biquad_count;
+    int32_t state[2][9]; // state[j][k] is the value x_k[j], i.e.  x[n-j] of the kth biquad. x[j][8] are outputs of 8th biquad
+    int32_t coef[5][8];  // coefficients. coef[j][k] is for the kth biquad. j maps to b0,b1,b2,-a1,-a2.
+} filter_biquad_s32_t;
+
+int32_t filter_biquad_s32(
+    filter_biquad_s32_t* filter,
+    const int32_t new_sample);
+*/
+
+#define FUNCTION_NAME filter_biquad_s32
+
+#define NSTACKVECS      (0)
+#define NSTACKWORDS     (32+8*NSTACKVECS)
+
+#define FILT_N          0
+#define FILT_STATE      1
+#define FILT_COEF       19
+
+#define COEF_START      32
+#define STATE_START     10
+
+
+#define state       x10      // ![0x%08X]
+#define sample      x11      // ![%d]
+#define coef        x12      // ![0x%08X]
+#define tmp         x13      // ![%d]
+#define _32         x18      // ![%d]
+#define _36         x19      // ![%d]
+#define filter      x24     // ![0x%08X]
+    
+.text
+.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+    {   li t3, 0                              ;   sw s8, 4                          (sp)}
+    {   mv filter, a0                          ;   xm.vsetc t3}
+    {   xm.ldcu tmp, FILT_STATE + STATE_START       ; nop                                           }
+        sh2add state, tmp, filter          // state <-- &(filter->state[1][1])
+    {   xm.ldcu tmp, FILT_COEF + COEF_START         ;   xm.vclrdr                                  }
+        sh2add coef,  tmp , filter           // coef <-- &(filter->coef[4][0])
+
+    {   li _36, 36                             ;   li _32, 32                             }
+
+// Deal with the b2 and -a2 coefficients before b1 and -a1, so we can overwrite them easily.
+
+    {   sub state, state, _36                   ;   xm.vldc state}
+    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+    {   add state, state, _32                   ;   xm.vldc state}
+    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+    {   sub state, state, _36                   ;   xm.vldc state}
+    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+    { nop                                       ;   xm.vldc state}
+    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+
+    // Now acc[k] =  b1[k] * x[n-1][k] + b2[k] * x[n-2][k] - a1[k] * y[n-1][k] - a2[k] * y[n-2][k]
+    // state = &(filter->state[0][0])
+    // coef = &(filter->coef[0][0])
+
+#undef _36
+#define N       x19      // ![%d]
+
+    // Move filter->state[0][:] to filter->state[1][:]
+
+    {   add t3, state, s3                      ;   xm.vldc state}
+    {   add tmp, state, _32                     ;   lw N,(FILT_N)*4                   ( filter)}
+    {   add t3, t3, _32                       ;   xm.vstc t3}
+    {   slli N, N, 1                             ;   lw tmp,0                         ( tmp)}
+    {   li tmp, 6                             ;   sw tmp,0                         ( t3)}
+
+    // Place the newest input sample in state[0][0]
+    {   sub N, tmp, N                           ;   sw sample,0                    ( state)}
+
+    // Overwrite state[0][1:9] with 0's
+lui t3, %hi(vpu_vec_zero)
+        addi t3,t3, %lo(vpu_vec_zero)
+    {   addi t3, state, 4                       ;   xm.vldc t3}
+    { nop                                           ;   xm.vstc t3}
+
+    // vC[:] <-- coef[b0][:]
+    { nop                                           ;   xm.vldc coef}
+
+    // Every element in x28[0:8] except for x28[0] is zero, so a VLMACC shouldn't affect them.
+    // Subsequent VLMACCs will corrupt the accumulators, but The Mask will stop that from being a
+    // problem. Smokin'!
+
+    // Let's make this more clear. We still haven't MACCed in the terms corresponding to b0,
+    // but we can't do all of those simultaneously as we did with the others because the x[n-0]
+    // for one section IS the output of the previous section, which we haven't finished calculating
+    // yet. So we need to go up the chain of filter sections, computing the output of each to get
+    // the input to the next. Because we've set the state[0][1:0] to zeros, when we're working on
+    // the k'th filter section, MACCing against that will not affect accumulators > k. Then we write
+    // out the output of section k. We do the MACC again, **which will corrupt the accumulators 
+    // which are LESS THAN k.... but that's FINE because we're not going to write them out again.   
+    {   xm.mkmski tmp, 4                            ;   xm.vlmacc0 state}
+        xm.vstrpv t3, tmp
+        li N, 0
+    { nop                                           ;   xm.bru N /* Do N-1 remaining biquads */    }
+
+    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+        xm.vstrpv t3, tmp
+    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+        xm.vstrpv t3, tmp
+    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+        xm.vstrpv t3, tmp
+    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+        xm.vstrpv t3, tmp
+    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+        xm.vstrpv t3, tmp
+    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+        xm.vstrpv t3, tmp
+    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+        xm.vstrpv t3, tmp
+   
+    // Final vstrpv should have written the output to filt->state[0][N]. filt->state should
+    // still be pointing at filt->state[0][0]
+
+    lw N,(FILT_N)*4                   ( filter)
+    xm.ldw a0,N                        ( state)
+
+
+.L_done:
+    { nop                                           ;   lw s8, 4                          (sp)}
+        xm.lddsp  s3,s2,8
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_size_end:
+    .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S
new file mode 100644
index 00000000..4f175d01
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S
@@ -0,0 +1,220 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+typedef struct {
+    unsigned biquad_count;
+    int32_t state[2][9]; // state[j][k] is the value x_k[j], i.e.  x[n-j] of the kth biquad. x[j][8] are outputs of 8th biquad
+    int32_t coef[5][8];  // coefficients. coef[j][k] is for the kth biquad. j maps to b0,b1,b2,-a1,-a2.
+} filter_biquad_s32_t;
+
+int32_t filter_biquad_sat_s32(
+    filter_biquad_s32_t* filter,
+    const int32_t new_sample);
+*/
+
+#define FUNCTION_NAME filter_biquad_sat_s32
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (10+2+8*NSTACKVECS)
+
+#define STACK_TMP_VR    (NSTACKWORDS - 16-2)
+#define STACK_TMP_VD    (NSTACKWORDS -  8-2)
+
+#define FILT_N          0
+#define FILT_STATE      1
+#define FILT_COEF       19
+
+#define COEF_START      32
+#define STATE_START     10
+
+
+#define state       x10      // ![0x%08X]
+#define sample      x11      // ![%d]
+#define coef        x12      // ![0x%08X]
+#define tmp         x13      // ![%d]
+#define _32         x18      // ![%d]
+#define _36         x19      // ![%d]
+#define filter      x24     // ![0x%08X]
+    
+.text
+.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+    {   li t3, 0                              ;   sw s8, 4                          (sp)}
+    {   mv filter, a0                          ;   xm.vsetc t3}
+    {   xm.ldcu tmp, FILT_STATE + STATE_START       ; nop                                           }
+        sh2add state, tmp, filter          // state <-- &(filter->state[1][1])
+    {   xm.ldcu tmp, FILT_COEF + COEF_START         ;   xm.vclrdr                                  }
+        sh2add coef, tmp, filter            // coef <-- &(filter->coef[4][0])
+
+    {   li _36, 36                             ;   li _32, 32                             }
+
+// Deal with the b2 and -a2 coefficients before b1 and -a1, so we can overwrite them easily.
+
+    {   sub state, state, _36                   ;   xm.vldc state}
+    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+    {   add state, state, _32                   ;   xm.vldc state}
+    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+    {   sub state, state, _36                   ;   xm.vldc state}
+    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+    {   nop                                     ;   xm.vldc state}
+    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+
+    // Now acc[k] =  b1[k] * x[n-1][k] + b2[k] * x[n-2][k] - a1[k] * y[n-1][k] - a2[k] * y[n-2][k]
+    // state = &(filter->state[0][0])
+    // coef = &(filter->coef[0][0])
+
+#undef _36
+#define N       x19      // ![%d]
+
+    // Move filter->state[0][:] to filter->state[1][:]
+
+    {   add t3, state, s3                      ;   xm.vldc state}
+    {   add t3, t3, _32                        ;   xm.vstc t3}
+    {   add tmp, state, _32                    ;   lw s2,(FILT_N)*4                 ( filter)}
+    {   li s3, 6                               ;   lw tmp,0                         ( tmp)}
+    {   li tmp, 6*8                            ;   sw tmp,0                         ( t3)}
+        mul N, s2, s3
+
+    // Place the newest input sample in state[0][0]
+    {   sub N, tmp, N                           ;   sw sample,0                    ( state)}
+
+#undef sample
+#define zeros x11
+
+    // Overwrite state[0][1:9] with 0's
+    lui t3, %hi(vpu_vec_zero)
+        addi t3,t3, %lo(vpu_vec_zero)
+    {   addi zeros, t3, 0                       ;   li _32, 32}
+    {   addi t3, state, 4                       ;   xm.vldc t3}
+    {   nop                                     ;   xm.vstc t3}
+
+    // vC[:] <-- coef[b0][:]
+    {  nop                                      ;   xm.vldc coef}
+
+#undef coef
+#define state_p1 x12
+
+    {   addi state_p1, t3, 0                    ;   addi t3,sp, (STACK_TMP_VR)*4              }
+
+    // Every element in x28[0:8] except for x28[0] is zero, so a VLMACC shouldn't affect them.
+    // Subsequent VLMACCs will corrupt the accumulators, but The Mask will stop that from being a
+    // problem. Smokin'!
+
+    // Let's make this more clear. We still haven't MACCed in the terms corresponding to b0,
+    // but we can't do all of those simultaneously as we did with the others because the x[n-0]
+    // for one section IS the output of the previous section, which we haven't finished calculating
+    // yet. So we need to go up the chain of filter sections, computing the output of each to get
+    // the input to the next. Because we've set the state[0][1:0] to zeros, when we're working on
+    // the k'th filter section, MACCing against that will not affect accumulators > k. Then we write
+    // out the output of section k. We do the MACC again, **which will corrupt the accumulators 
+    // which are LESS THAN k.... but that's FINE because we're not going to write them out again.
+
+    // instead of using _32 could just use that register as a second pointer for the vD temp stack,
+    // wouldn't need to recalculate the stack pointer every time, doesn't            _       _
+    // matter here as we're not using most of the resourse line instructions anyway   \(`~`)/
+
+    {   xm.mkmski tmp, 4                            ;   xm.vlmacc0 state}
+    {   add t3, t3, _32                             ;   xm.vstr t3}
+    {   nop                                         ;   xm.vstd t3}
+        xm.vlsat zeros
+        xm.vstrpv state_p1, tmp
+li N, (0)
+    { nop                                           ;   xm.bru N /* Do N-1 remaining biquads */    }
+
+    {   sub t3, t3, _32                             ;   xm.vldd t3}
+    {   nop                                         ;   xm.vldr t3}
+    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
+    {   add t3, t3, _32                             ;   xm.vstr t3}
+    {   nop                                         ;   xm.vstd t3}
+        xm.vlsat zeros
+        xm.vstrpv state_p1, tmp
+
+
+    {   sub t3, t3, _32                             ;   xm.vldd t3}
+    {   nop                                         ;   xm.vldr t3}
+    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
+    {   add t3, t3, _32                             ;   xm.vstr t3}
+    {   nop                                         ;   xm.vstd t3}
+        xm.vlsat zeros
+        xm.vstrpv state_p1, tmp
+
+
+    {   sub t3, t3, _32                             ;   xm.vldd t3}
+    {   nop                                         ;   xm.vldr t3}
+    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
+    {   add t3, t3, _32                             ;   xm.vstr t3}
+    {   nop                                         ;   xm.vstd t3}
+        xm.vlsat zeros
+        xm.vstrpv state_p1, tmp
+
+
+    {   sub t3, t3, _32                             ;   xm.vldd t3}
+    {   nop                                         ;   xm.vldr t3}
+    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
+    {   add t3, t3, _32                             ;   xm.vstr t3}
+    {   nop                                         ;   xm.vstd t3}
+        xm.vlsat zeros
+        xm.vstrpv state_p1, tmp
+
+
+    {   sub t3, t3, _32                             ;   xm.vldd t3}
+    {   nop                                         ;   xm.vldr t3}
+    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
+    {   add t3, t3, _32                             ;   xm.vstr t3}
+    {   nop                                         ;   xm.vstd t3}
+        xm.vlsat zeros
+        xm.vstrpv state_p1, tmp
+
+
+    {   sub t3, t3, _32                             ;   xm.vldd t3}
+    {   nop                                         ;   xm.vldr t3}
+    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
+    {   add t3, t3, _32                             ;   xm.vstr t3}
+    {   nop                                         ;   xm.vstd t3}
+        xm.vlsat zeros
+        xm.vstrpv state_p1, tmp
+
+
+    {   sub t3, t3, _32                             ;   xm.vldd t3}
+    {   nop                                         ;   xm.vldr t3}
+    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
+    {   add t3, t3, _32                             ;   xm.vstr t3}
+    {   nop                                         ;   xm.vstd t3}
+        xm.vlsat zeros
+        xm.vstrpv state_p1, tmp
+
+    // Final vstrpv should have written the output to filt->state[0][N]. filt->state should
+    // still be pointing at filt->state[0][0]
+
+    { nop                                           ;   lw N,(FILT_N)*4                   ( filter)}
+    xm.ldw a0,N                        ( state)
+    { nop                                           ;   lw s8, 4                          (sp)}
+        xm.lddsp  s3,s2,8
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_size_end:
+    .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S
new file mode 100644
index 00000000..cf0ae599
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S
@@ -0,0 +1,118 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+typedef struct {
+    unsigned num_taps;
+    right_shift_t shift;
+    int16_t* coef;
+    int16_t* state;
+} filter_fir_s16_t;
+
+int16_t filter_fir_s16(
+    filter_fir_s16_t* filter,
+    const int16_t new_sample);
+*/
+
+#define FUNCTION_NAME filter_fir_s16
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (12+8*NSTACKVECS)
+
+#define FILT_N          0
+#define FILT_SHIFT      1
+#define FILT_COEF       2
+#define FILT_STATE      3
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8)
+#define STACK_VEC_VR    (NSTACKWORDS-16)
+
+#define STACK_FILTER    (8)
+
+#define buff        x10
+#define length      x11
+#define sample      x12
+#define tmpA        x13
+#define _32         x18
+#define coef        x19
+#define filter      x24
+    
+.text
+.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+    {   li _32, 32                             ;   sw s8, 4                          (sp)}
+    {   mv filter, a0                          ;   mv sample, a1                          }
+    { nop                                           ;   lw length,(FILT_N)*4              ( filter)}
+    { nop                                           ;   lw buff,(FILT_STATE)*4            ( filter)}
+        call filter_fir_s16_push_sample_up
+    { nop                                           ;   lw coef,(FILT_COEF)*4             ( filter)}
+    { nop                                           ;   lw buff,(FILT_STATE)*4            ( filter)}
+    { nop                                           ;   lw length,(FILT_N)*4              ( filter)}
+    {   slli t3, _32, 3                         ;   xm.vclrdr                                  }
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vsetc t3}
+    {   mv tmpA, length                        ;   xm.vstd t3}
+    {   xm.zexti tmpA, 4                            ;   srli length, length, 4                   }
+    {   slli tmpA, tmpA, 1                       ;   xm.brff length, .L_loop_end                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    
+    .L_loop_top:
+        {   add buff, buff, _32                     ;   xm.vldc buff}
+        {   addi length, length, -1                   ;   xm.vlmaccr0 coef}
+        xm.vlmaccr1 coef
+        {   add coef, coef, _32                     ;   xm.bt length, .L_loop_top                  }
+    .L_loop_end:
+
+    {   addi a2,sp, (STACK_VEC_VR)*4               ;   xm.mkmsk tmpA, tmpA                            }
+    {   mv t3, buff                           ;   xm.vstr a2}
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vldr t3}
+        xm.vstrpv t3, tmpA
+    {   li tmpA, 0                             ;   lw a2,(FILT_SHIFT)*4                  ( filter)}
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vldc t3}
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vlmaccr0 coef}
+    xm.vlmaccr1 coef
+    //{ nop                                           ;   xm.vadddr                                      }
+    xm.st16 x12, tmpA(x28)
+
+    xm.vlsat t3
+    { nop                                           ;   xm.vstr t3}
+    xm.ld16s x10, tmpA(x28) 
+.L_done:
+    { nop                                           ;   lw s8, 4                          (sp)}
+        xm.lddsp  s7,s6,24
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s3,s2,8
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS + filter_fir_s16_push_sample_up.nstackwords;      /* Translation error on this line: unexpected token at position 86. */ 
+.global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_size_end:
+    .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S
new file mode 100644
index 00000000..c06f7123
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S
@@ -0,0 +1,206 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+typedef struct {
+    unsigned num_taps;
+    unsigned head;
+    right_shift_t shift;
+    int32_t* coef;
+    int32_t* state;
+} filter_fir_s32_t;
+
+int32_t filter_fir_s32(
+    filter_fir_s32_t* filter,
+    const int32_t new_sample);
+*/
+
+#define FUNCTION_NAME filter_fir_s32
+
+#define NSTACKVECS      (1)
+#define NSTACKWORDS     (12+8*NSTACKVECS)
+
+#define FILT_N          0
+#define FILT_HEAD       1
+#define FILT_SHIFT      2
+#define FILT_COEF       3
+#define FILT_STATE      4
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-12)
+
+
+#define filter      x23
+#define sample      x11
+#define tmp1        x18
+#define tmp2        x22    
+    
+.text
+.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+    {   li t3, 0                              ;   sw s8, 4                          (sp)}
+
+    // Set VPU mode to 32-bit
+    {   mv filter, a0                          ;   xm.vsetc t3}
+
+
+// The field filter->head points to where the newest sample will go, which is probably somewhere in the middle of the 
+// state vector. This effectively splits the work to be done into two pieces -- the stuff after filter->head, and the 
+// stuff before it. The stuff after filter->head I'm calling part A (corresponds to lowest coef[] indices). The stuff 
+// before it I'm calling part B. 
+
+// I'm just going to create two sets of registers, corresponding to each of the two parts. That's what this is.
+
+#define state_A     x10
+#define state_B     x19
+
+#define N_A         x12    
+#define N_B         x21
+
+#define coef_A      x11
+#define coef_B      x20
+
+    // Get the current head position, which is also the number of taps in part B
+    { nop                                           ;   lw N_B,(FILT_HEAD)*4              ( filter)}
+
+    // If N_B is currently zero, then the next head is the final index. Otherwise it's just
+    // the head decremented by 1.
+    {   addi t3, N_B, -1                         ;   lw N_A,(FILT_N)*4                 ( filter)}
+    { nop                                           ;   xm.bt N_B, .L_no_reset                     }
+    {   addi t3, N_A, -1                         ; nop                                           }
+
+    .L_no_reset:
+    { nop                                           ;   sw t3,(FILT_HEAD)*4              ( filter)}
+
+    // Store the newest sample in the state. And grab the rest of the state/coef/N values
+    { nop                                           ;   lw state_B,(FILT_STATE)*4         ( filter)}
+        sh2add state_A, N_B, state_B
+    {   sub N_A, N_A, N_B                       ;   sw sample,0                  ( state_A)}
+    {   slli tmp1, N_A, 2                        ;   lw coef_A,(FILT_COEF)*4           ( filter)}
+        sh2add coef_B, N_A, coef_A
+#undef sample
+
+    // Each part has its own tail. We'll handle both of those first (by masking the state with zeros), then we'll do the 
+    // bulk of the work after
+
+    {   addi s8,sp, (STACK_VEC_TMP)*4             ;   xm.vclrdr                                  }
+    {   mv t3, state_A                        ;   xm.vstd s8}
+    {   xm.zexti tmp1, 5                            ;   xm.vldr t3}
+    {   xm.mkmsk t3, tmp1                         ;   srli N_A, N_A, 3                         }
+        xm.vstrpv s8, t3
+    {   mv t3, state_B                        ;   xm.vldc s8}
+    {   slli tmp2, N_B, 2                        ;   xm.vldr t3}
+    {   xm.zexti tmp2, 5                            ;   xm.vstd s8}
+    {   xm.mkmsk t3, tmp2                         ;   srli N_B, N_B, 3                         }
+        xm.vstrpv s8, t3
+    {   add state_A, state_A, tmp1              ;   xm.vclrdr                                  }
+    {   add coef_A, coef_A, tmp1                ;   xm.vlmaccr0 coef_A}
+    {   add state_B, state_B, tmp2              ;   xm.vldc s8}
+    {   add coef_B, coef_B, tmp2                ;   xm.vlmaccr0 coef_B}
+
+// Now, go back through and do full vectors.
+
+#undef tmp2
+#define _32     x22
+
+    tail .L_part_A_start
+    .p2align 4
+    .L_part_A_start:
+        {   li _32, 32                             ;   xm.brff N_A, .L_part_A_end                   }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_part_A_loop_top:
+            {   add state_A, state_A, _32               ;   xm.vldc state_A}
+            {   addi N_A, N_A, -1                         ;   xm.vlmaccr0 coef_A}
+            {   add coef_A, coef_A, _32                 ;   xm.bt N_A, .L_part_A_loop_top              }
+    .L_part_A_end:
+#undef state_A
+#undef N_A
+#undef coef_A
+
+    .L_part_B_start:
+        {   li _32, 32                             ;   xm.brff N_B, .L_part_B_end                   }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_part_B_loop_top:
+            {   add state_B, state_B, _32               ;   xm.vldc state_B}
+            {   addi N_B, N_B, -1                         ;   xm.vlmaccr0 coef_B}
+            {   add coef_B, coef_B, _32                 ;   xm.bt N_B, .L_part_B_loop_top              }
+    .L_part_B_end:
+    
+#undef state_B
+#undef N_B
+#undef coef_B
+
+// Now combine the 40-bit accumulators, assumes that x24 points to the stack.
+// (the logic for this is a too complicated to explain here)
+lui t3, %hi(vpu_vec_0x40000000)
+        addi t3,t3, %lo(vpu_vec_0x40000000)
+    { nop                                           ;   lw a2,(FILT_SHIFT)*4              ( filter)}
+    {   addi s2, a2, -1                           ;   xm.vldc t3}
+    {   li s3, 1                               ;   xm.vstr s8}
+lui t3, %hi(vpu_vec_0x80000000)
+        addi t3,t3, %lo(vpu_vec_0x80000000)
+    {   xm.shl s2, s3, s2                          ;   xm.vlmacc0 t3}
+lui t3, %hi(vpu_vec_zero)
+        addi t3,t3, %lo(vpu_vec_zero)
+    {   li t3, 0                              ;   xm.vldr t3}
+    {   xm.slt a3, t3, a2                         ;   xm.vlmaccr0 s8}
+
+    { nop ; xm.vstd x24}
+    { xm.neg x20, x12 ; nop}
+//{   neg s4, a2                              ;   vstd s8}"
+    {   addi s4, s4, 1                           ;   xm.vlmaccr0 s8}
+    { nop                                           ;   xm.vstr s8}
+
+// x11 and x10 will contain a 64-bit result. Left or right-shift that as appropriate.
+
+        xm.lddi  a1,a0, 0(s8)
+    {   addi a1, a1, 8                           ;   xm.brff a3, .L_left_shift                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_right_shift:
+        // (from the block above):  x19 = 1, x18 = 1<<(x12 - 1)
+        // adding x18*x19 (=x18) to x11:x10 effectively rounds it when we extract it.
+        xm.maccs a1, a0, s2, s3
+        xm.lsats a1, a0, a2
+        xm.lextract a0, a1, a0, a2, 32
+    { nop                                           ;   xm.bu .L_done                              }
+
+    .L_left_shift:
+        // (from the block above):  x19 = 1, x20 = -x12 + 1, x28 = 0
+        // If we're left-shifting (or zero-shifting), we still need to saturate to q31.
+        // lsats has a bug which doesn't allow to use it with 0, so we'll have to 
+        // add 1 to our shift, left-shift, saturate and extract with 1, no need to round here.
+    {   xm.shl a1, a1, s4                          ; nop                                           }
+        xm.linsert a1, t3, a0, s4, 32
+        xm.lsats a1, t3, s3
+        xm.lextract a0, a1, t3, s3, 32
+
+.L_done:
+        xm.lddsp  s7,s6,24
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s3,s2,8
+    { nop                                           ;   lw s8, 4                          (sp)}
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_size_end:
+    .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S b/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S
new file mode 100644
index 00000000..96235de8
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S
@@ -0,0 +1,127 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+Push a sample into the buffer, moving everything 1 index up.
+
+void filter_fir_s16_push_sample_down(
+    int16_t* buffer,
+    const unsigned length,
+    const int16_t new_value);
+*/
+
+#define FUNCTION_NAME filter_fir_s16_push_sample_down
+
+#define NSTACKVECS      (1)
+#define NSTACKWORDS     (12+8*NSTACKVECS)
+
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8)
+
+
+#define buff        x10
+#define length      x11
+#define value       x12
+#define _60         x13
+#define mask        x18
+#define tail_start  x19
+#define buff_end    x20
+#define buffD       x21
+#define tmp         x24
+    
+.text
+.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+    {   li a3, 32                              ;   sw s8, 4                          (sp)}
+
+    {   slli t3, a3, 3                          ;   li mask, 28 /*28 samples at a time*/   }
+    {   xm.mkmsk mask, mask                        ;   xm.vsetc t3}
+
+    // We're going to be moving 28 samples per loop iteration. The last address at which we 
+    // can move 28 samples is  56 bytes before the end of the buffer. The end of the buffer is
+    // at  buff + 2*length. 
+    
+    {   slli tail_start, length, 1               ;   li t3, 56                             }
+    {   add buff_end, buff, tail_start          ;   slli mask, mask, 4                       }
+    {   sub tail_start, buff_end, t3           ;   addi _60, t3, 4                         }
+
+    {   mv t3, buff                           ;   xm.sltu tmp, tail_start, buff               }
+    {   li tmp, 28                             ;   xm.bt tmp, .L_loop_end                     }
+    {   add buffD, buff, tmp                    ;   xm.bu .L_loop_top                          }
+
+    .p2align 4 // Does this loop have an FNOP after the first iteration? It all fits in the instruction buffer..
+    .L_loop_top:
+        {   addi buff, t3, -4                        ;   xm.vldr t3}
+        {   add t3, buff, _60                      ;   xm.vldd buffD}
+        {   addi buffD, buffD, -4                     ;   xm.vlmaccr0 buff}
+        xm.vlmaccr1 buff
+        {   xm.sltu tmp, tail_start, t3                ;   xm.vstd buffD}
+            xm.vstrpv buff , mask
+        {   add buffD, buffD, _60                   ;   xm.bt tmp, .L_loop_end   } 
+        {nop ;   xm.bu .L_loop_top }             /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    .L_loop_end:
+
+#undef _60
+
+    // x28 holds the address of the next sample to be moved.
+    {   sub length, buff_end, t3               ;   li tmp, 29                         }
+    {   xm.sltu tmp, length, tmp                    ;   li a3, 28                          }
+    { nop                                           ;   xm.bt tmp, .L_skippp                   }
+    {   addi buff, t3, -4                        ;   xm.vldr t3}
+    {   nop                        ;   xm.vlmaccr0 t3}
+    xm.vlmaccr1 t3
+    {   add t3, t3, a3                        ;   nop}
+        xm.vstrpv buff, mask
+
+    
+    .L_skippp:
+    {   sub length, buff_end, t3               ; nop                                       }
+    {   li a3, 0                               ;   xm.vldr t3}
+    {   xm.mkmsk tmp, length                       ;   addi buff, t3, -4                    }
+    {   nop                   ;   xm.vlmaccr0 t3}   
+     xm.vlmaccr1 t3            
+    {   add t3, t3, length                    ;   nop}
+    {   addi t3, t3, -2                         ;   slli tmp, tmp, 4                     }
+        xm.vstrpv buff, tmp
+
+        xm.st16 value,  a3(t3)
+        //xm.st16 value,  t3(a3)
+
+.L_done:
+        xm.lddsp  s7,s6,24
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s3,s2,8
+    { nop                                           ;   lw s8, 4                          (sp)}
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_size_end:
+    .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S b/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S
new file mode 100644
index 00000000..c7073b05
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S
@@ -0,0 +1,152 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+Push a sample into the buffer, moving everything 1 index up.
+
+void filter_fir_s16_push_sample_up(
+    int16_t* buffer,
+    const unsigned length,
+    const int16_t new_value);
+*/
+
+#define FUNCTION_NAME filter_fir_s16_push_sample_up
+
+#define NSTACKVECS      (1)
+#define NSTACKWORDS     (12+8*NSTACKVECS)
+
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8)
+
+
+#define buff_start  x10
+#define length      x11
+#define value       x12
+#define tmpB        x13
+#define mask        x18
+#define buffR       x19
+#define tmpC        x20
+#define buffD       x21
+#define tmp         x24
+    
+.text
+.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.type FUNCTION_NAME,@function
+.p2align 4
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+    {   li tmpB, 32                            ;   sw s8, 4                          (sp)}
+
+    {   slli t3, tmpB, 3                        ;   xm.mkmski mask, 32                          }
+    {   mv tmp, length                         ;   xm.vsetc t3}
+
+// If the number of samples is odd, pretend it was one larger. If it's even, move the
+// final sample without the VPU.
+
+    xm.zexti tmp, 1
+    xm.eq buffR, length, 1 
+    {   add length, length, tmp                 ;   xm.bt buffR, .L_write_new_sample           }
+    {   addi tmp, length, -2                      ;   xm.bt tmp, .L_odd_samps                    }
+.L_even_samps:
+   // xm.ld16s buffD, buff_start(tmp)
+    xm.ld16s buffD, tmp(buff_start)
+    addi tmp, tmp, 1   
+    //xm.st16 buffD, buff_start(tmp) 
+    xm.st16 buffD, tmp(buff_start)
+.L_odd_samps:
+
+    {   slli mask, mask, 4                       ;   slli length, length, 1                   }
+
+// buffR <-- first byte after buff[]
+// mask <-- 0xFFFFFFF0
+    {   add buffR, buff_start, length           ; nop                                           }
+
+// Move buffD and buffR to point to:
+    {   sub buffR, buffR, tmpB                  ;   li tmpB, 28                            }
+    {   sub buffD, buffR, tmpB                  ;   srli mask, mask, 2                       }
+
+// If (buffD < buff_start) then skip the loop.
+    {   mv t3, buffR                          ;   xm.sltu tmp, buffD, buff_start              }
+    {   li tmpB, 56                            ;   xm.bt tmp, .L_loop_end                     }
+    { nop                                           ;   xm.bu .L_loop_top                          }
+
+// Do the loop. Align to 16 bytes so that we hopefully don't have FNOPs after the first
+// iteration.
+    .p2align 4
+    .L_loop_top:
+        {   mv buffR, buffD                        ;   xm.vldr t3}
+        {   sub buffD, buffD, tmpB                  ;   xm.vldd buffD}
+        {   xm.sltu tmp, buffD, buff_start              ;   xm.vlmaccr0 t3}
+        xm.vlmaccr1 t3
+        xm.vstrpv t3, mask
+        { nop                                           ;   xm.vstd buffR}
+      //  {   sub t3, t3, tmpB                      ;   xm.brff tmp, .L_loop_top                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+         {   sub t3, t3, tmpB                      ;   xm.bt tmp, .L_loop_end        }
+         {xm.bu .L_loop_top ;nop}
+    .L_loop_end:
+
+
+    // If (x28 < buff_start ) we CANNOT do another vector (just vR[]) using the same
+    // mask. Otherwise, we can.
+
+    {   xm.sltu tmp, t3, buff_start                ; nop                                           }
+    {   mv buffR, t3                          ;   xm.bt tmp, .L_skippp                       }
+    {   li tmpB, 28                            ;   xm.vldr t3}
+    {   sub t3, t3, tmpB                     ;   xm.vlmaccr0 buffR}
+    xm.vlmaccr1 buffR
+    xm.vstrpv buffR, mask
+
+.L_skippp:
+    // Now we have less than 1 vector (14 samples) to shift. They'll be at the end of
+    // the vector when we load x28. Everything after buff_start.
+
+    {   sub length, buff_start, t3             ;   xm.mkmski tmpC, 2                           }
+    {   xm.mkmski mask, 32                          ;   xm.bitrev tmpC, tmpC                       }
+    
+    { xm.shl mask, mask, length; xm.vldr t3}
+
+    xm.andnot mask, tmpC 
+    {nop; xm.vlmaccr0 x28                          }
+    xm.vlmaccr1 x28   
+
+    xm.vstrpv t3, mask
+
+.L_write_new_sample:
+    {   li tmpC, 0                             ; nop                                           }
+   // xm.st16 value, buff_start(tmpC)
+     xm.st16 value, tmpC(buff_start)
+.L_done:
+        xm.lddsp  s7,s6,24
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s3,s2,8
+    { nop                                           ;   lw s8, 4                          (sp)}
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_size_end:
+    .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S b/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S
new file mode 100644
index 00000000..49ee937e
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S
@@ -0,0 +1,130 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+headroom_t vect_s32_convolve_valid(
+    int32_t signal_out[],
+    const int32_t signal_in[],
+    const int32_t filter_q30[],
+    const unsigned signal_in_length,
+    const unsigned filter_taps);
+    
+*/
+
+// #include "../asm_helper.h"
+
+#define NSTACKVECTS     (2)
+#define NSTACKWORDS     (16 + 8*NSTACKVECTS+4)
+
+#define FUNCTION_NAME   vect_s32_convolve_valid
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+
+#define sig_out     x10
+#define sig_in      x11
+#define filter      x12
+#define len         x13
+
+#define tmpA        x18
+#define _32         x19
+#define vec_tmp     x20
+#define tmpB        x21
+
+
+#define P           filter    // P = (filter_taps >> 1)
+
+
+
+.text; .issue_mode dual /* Translation error on this line: unexpected token at position 5. */ 
+.p2align 2
+
+
+FUNCTION_NAME:
+
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+  ////// Set mode to 32-bit
+  { li t3, 0                                ; sw s8, 4                            (sp)}
+  { addi vec_tmp,sp, (STACK_VEC_TMP)*4           ; xm.vsetc t3}
+  
+  ////// Move the filter coefficients into vC[]
+  mv tmpB, a4
+  { mv t3, filter                           ; nop}
+  { slli tmpA, tmpB, 2                         ; xm.vclrdr                                    }
+  { xm.mkmsk tmpA, tmpA                          ; xm.vstd vec_tmp}
+  { srli P, tmpB, 1                            ; xm.vldr t3}
+    xm.vstrpv vec_tmp, tmpA
+  { sub len, len, P                          ; xm.vldc vec_tmp}
+  { sub len, len, P                          ; li _32, 32                               }
+  
+  // Number of output elements is  sig_in_length - (2 * (filter_taps >> 1)) = sig_in_length - 2*P
+
+  { srli t3, len, 3                           ; add sig_in, sig_in, _32                   }
+  { addi sig_in, sig_in, -4                     ; xm.brff t3, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+  .L_loop_top:
+    { addi len, len, -8                           ; xm.vclrdr                                    }
+    { addi t3, sig_in, -4                        ; xm.vlmaccr0 sig_in}
+    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
+    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
+    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
+    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
+    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
+    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
+    { srli t3, len, 3                           ; xm.vlmaccr0 t3}
+    { add sig_in, sig_in, _32                   ; xm.vstr sig_out}
+    { add sig_out, sig_out, _32                 ; xm.bt t3, .L_loop_top                       }
+  .L_loop_bot:
+
+// If there is a tail, then len will be non-zero.
+// In that case, there are len elements left to VLMACCR, but sig_in[] currently points to the last
+// element of the group, assuming a full 8 elements are to be output. But of course the tail must,
+// by definition, be fewer than 8 elements.  So sig_in[] needs to be offset:
+//  sig_in <-- sig_in - 4*(8 - len) = sig_in - 32 + 4*len
+
+  { slli len, len, 2                           ; xm.brff len, .L_finish                         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+  { sub sig_in, sig_in, _32                   ; xm.vclrdr                                    }
+  { xm.mkmsk tmpA, len                           ; add sig_in, sig_in, len                   }
+
+  .L_tail_loop:
+    { addi len, len, -4                         ; xm.vlmaccr0 sig_in}
+    { addi sig_in, sig_in, -4                   ; xm.bt len, .L_tail_loop                      }
+  .L_tail_loop_bot:
+
+    xm.vstrpv sig_out, tmpA
+  { nop                                           ; xm.vstr vec_tmp}
+
+.L_finish:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+    
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S b/lib_xcore_math/src/arch/vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S
new file mode 100644
index 00000000..01a4cc37
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S
@@ -0,0 +1,128 @@
+// Copyright 2021-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+  M_rows MUST be a multiple of 16, and N_cols MUST be a multiple of 32
+
+void mat_mul_s8_x_s8_yield_s32 (
+    split_acc_s32_t accumulators[],
+    const int8_t matrix[],
+    const int8_t input_vect[],
+    const unsigned M_rows,
+    const unsigned N_cols);
+*/
+
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (12+8+4)
+
+#define FUNCTION_NAME   mat_mul_s8_x_s8_yield_s32
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_M_ROWS      (9)
+#define STACK_INPUT_VECT  (9)
+
+#define accs            x10 
+#define matrix          x11 
+#define vector          x12
+#define rows_left       x13
+#define cols_left       x18
+#define _32             x19
+#define N_cols          x20
+#define mat_stride_B    x21
+#define mat_stride_C    x22
+
+#define K               x24
+
+.text
+.p2align 2
+
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    li t3, 0x200
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+  { li _32, 32                               ; sw s8, 4                            (sp)}
+  { li s8, 15                               ; xm.vsetc t3}
+    add rows_left, rows_left, s8
+    mv N_cols, a4
+  { srli rows_left, rows_left, 4               ; nop}
+  { slli mat_stride_B, N_cols, 4               ; mv K, N_cols                             }
+  { sub mat_stride_C, mat_stride_B, N_cols    ; li s7, 31                                }
+  { add mat_stride_B, mat_stride_C, _32       ; sw vector, (STACK_INPUT_VECT)*4          (sp)}
+
+  { xm.zexti K, 5                                 ; add s7, N_cols, s7                        }
+  { sub K, _32, K                             ; srli s7, s7, 5                             }
+  { xm.zexti K, 5                                 ; nop                                           }
+  { add matrix, matrix, K                     ; sub mat_stride_C, mat_stride_C, K         }
+  mv a4, N_cols
+  { slli K, s7, 5                              ; nop}
+  
+  { add matrix, matrix, mat_stride_C          ; xm.bu .L_output_group_top                    }
+
+  .p2align 4
+  .L_output_group_top:
+    { add t3, accs, _32                        ; xm.vldd accs}
+    { mv cols_left, K                          ; xm.vldr t3}
+    { addi rows_left, rows_left, -1               ; lw vector, (STACK_INPUT_VECT)*4          (sp)}
+    
+    .L_input_group_top:
+      { add vector, vector, _32                   ; xm.vldc vector}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub matrix, matrix, N_cols                ; xm.vlmaccr0 matrix}
+      { sub cols_left, cols_left, _32             ; xm.vlmaccr0 matrix}
+      { add matrix, matrix, mat_stride_B          ; xm.bt cols_left, .L_input_group_top          }
+    .L_input_group_bottom:
+
+    { add accs, accs, _32                       ; xm.vstd accs}
+    { add accs, accs, _32                       ; xm.vstr accs}
+    { add matrix, matrix, mat_stride_C          ; xm.bt rows_left, .L_output_group_top         }
+  .L_output_group_bottom:
+    
+.L_finish:
+      xm.lddsp  s3,s2,8
+      xm.lddsp  s5,s4,16
+      xm.lddsp  s7,s6,24
+  { nop                                           ; lw s8, 4                          (sp)}
+  { nop                                           ; xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/misc/chunk_float_s32_log.S b/lib_xcore_math/src/arch/vx4b/misc/chunk_float_s32_log.S
new file mode 100644
index 00000000..2e4a617c
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/misc/chunk_float_s32_log.S
@@ -0,0 +1,184 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+Condition:  0 < ldexp(b[k], -30) < 2
+
+
+void chunk_float_s32_log(
+    q8_24 a[],
+    const float_s32_t b[]);
+*/
+
+
+#define NSTACKWORDS     (8+48+4)
+
+#define FUNCTION_NAME   chunk_float_s32_log
+
+#define SP_VEC_X1    ((NSTACKWORDS) - 8 -4)
+#define SP_VEC_X2    ((NSTACKWORDS) - 16-4)
+#define SP_VEC_X3    ((NSTACKWORDS) - 24-4)
+#define SP_VEC_X4    ((NSTACKWORDS) - 32-4)
+#define SP_VEC_X5    ((NSTACKWORDS) - 40-4)
+#define SP_VEC_X6    ((NSTACKWORDS) - 48-4)
+
+
+.text
+.p2align 2
+
+.L_ps_coef1: .word -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000 /* Translation error on this line: unexpected token at position 13. */ 
+.L_ps_coef2: .word  0x555555,  0x555555,  0x555555,  0x555555,  0x555555,  0x555555,  0x555555,  0x555555 /* Translation error on this line: unexpected token at position 13. */ 
+.L_ps_coef3: .word -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000 /* Translation error on this line: unexpected token at position 13. */ 
+.L_ps_coef4: .word  0x333333,  0x333333,  0x333333,  0x333333,  0x333333,  0x333333,  0x333333,  0x333333 /* Translation error on this line: unexpected token at position 13. */ 
+.L_ps_coef5: .word -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab /* Translation error on this line: unexpected token at position 13. */ 
+
+.L_ln_2: .word 0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe /* Translation error on this line: unexpected token at position 9. */ 
+
+
+#define a           x10 
+#define b           x11
+#define mantA       x28
+#define expA        x12
+#define mantB       x13
+#define expB        x18
+#define tmpA        x19
+#define tmpB        x20
+#define vec_x       x21
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+{ addi vec_x,sp, (SP_VEC_X1)*4   ; nop                             }
+
+  xm.lddi  mantA,expA, 0(b)
+  xm.lddi  mantB,expB, 8(b)
+{ xm.cls tmpA, mantA             ; nop                     }
+{ nop                            ; xm.cls tmpB, mantB      }
+{ sub expA, expA, tmpA        ; sub expB, expB, tmpB       }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  expA,expB, 0(a)
+  xm.stdi  mantA,mantB, 0(vec_x)
+
+  xm.lddi  mantA,expA, 16(b)
+  xm.lddi  mantB,expB, 24(b)
+
+{ xm.cls tmpA, mantA             ; nop                     }
+{ nop                            ; xm.cls tmpB, mantB      }
+{ sub expA, expA, tmpA           ; sub expB, expB, tmpB    }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  expA,expB, 8(a)
+  xm.stdi  mantA,mantB, 8(vec_x)
+
+  xm.lddi  mantA,expA, 32(b)
+  xm.lddi  mantB,expB, 40(b)
+{ xm.cls tmpA, mantA             ; nop                     }
+{ nop                            ; xm.cls tmpB, mantB      }
+{ sub expA, expA, tmpA        ; sub expB, expB, tmpB       }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  expA,expB, 16(a)
+  xm.stdi  mantA,mantB, 16(vec_x)
+
+  xm.lddi  mantA,expA, 48(b)
+  xm.lddi  mantB,expB, 56(b)
+{ xm.cls tmpA, mantA             ; nop                     }
+{ nop                            ; xm.cls tmpB, mantB      }
+{ sub expA, expA, tmpA        ; sub expB, expB, tmpB       }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  expA,expB, 24(a)
+  xm.stdi  mantA,mantB, 24(vec_x)
+
+{ li t3, 0                  ; nop                             }
+{ li tmpA, 24                ; xm.vsetc t3}
+
+  la t3, vpu_vec_0x20000000
+{ nop                         ; xm.vclrdr                      }
+{ xm.neg tmpA, tmpA           ; nop                            }
+  xm.vlashr a, tmpA
+{ xm.ldap t3, .L_ln_2           ; xm.vladd t3}
+{ nop                             ; xm.vlmul0 t3}
+
+{ li t3, 0                  ; xm.vstr a}
+  xm.vlashr vec_x, t3
+  la t3, vpu_vec_0x00000002
+xm.vlsat t3
+  la t3, vpu_vec_neg_0x40000000
+{ addi tmpB,sp, (SP_VEC_X1)*4    ; xm.vladd t3}
+
+#undef mantA
+#undef expA
+#undef mantB
+#undef expB
+
+{ addi vec_x,sp, (SP_VEC_X2)*4   ; xm.vstr vec_x}
+{ nop                             ; xm.vlmul0 tmpB} // (x-1.0)^2
+{ addi vec_x,sp, (SP_VEC_X3)*4   ; xm.vstr vec_x}
+{ nop                             ; xm.vlmul0 tmpB} // (x-1.0)^3
+{ addi vec_x,sp, (SP_VEC_X4)*4   ; xm.vstr vec_x}
+{ nop                             ; xm.vlmul0 tmpB} // (x-1.0)^4
+{ addi vec_x,sp, (SP_VEC_X5)*4   ; xm.vstr vec_x}
+{ li tmpA, 6                 ; xm.vlmul0 tmpB} // (x-1.0)^5
+{ addi vec_x,sp, (SP_VEC_X6)*4   ; xm.vstr vec_x}
+{ xm.ldap t3, .L_ps_coef5       ; xm.vlmul0 tmpB} // (x-1.0)^6
+{ addi tmpB,sp, (SP_VEC_X1)*4    ; xm.vstr vec_x}
+
+  xm.vlashr tmpB, tmpA                                         // vR[] = coef[0] * x
+{ xm.ldap t3, .L_ps_coef4       ; xm.vldc t3} // vC[] = coef[5]
+{ addi vec_x,sp, (SP_VEC_X5)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[5] * x^6
+{ xm.ldap t3, .L_ps_coef3       ; xm.vldc t3} // vC[] = coef[4]
+{ addi vec_x,sp, (SP_VEC_X4)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[4] * x^5
+{ xm.ldap t3, .L_ps_coef2       ; xm.vldc t3} // vC[] = coef[3]
+{ addi vec_x,sp, (SP_VEC_X3)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[3] * x^4
+{ xm.ldap t3, .L_ps_coef1       ; xm.vldc t3} // vC[] = coef[2]
+{ addi vec_x,sp, (SP_VEC_X2)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[2] * x^3
+{ nop                             ; xm.vldc t3} // vC[] = coef[1]
+{ addi vec_x,sp, (SP_VEC_X1)*4   ; xm.vlmacc0 vec_x} // vR[] += coef[1] * x^2
+
+{ nop                             ; xm.vladd a}
+{ li tmpA, 1                 ; xm.vstr a}
+
+// Any inputs that were 0 should become INT32_MIN
+la t3, vpu_vec_0x7FFFFFFF
+  xm.vlashr t3, tmpA
+{ nop                             ; xm.vladd vec_x} 
+{ nop                             ; xm.vdepth1                     } 
+{ nop                             ; xm.vstr vec_x}
+{ nop                             ; lw tmpA,0          ( vec_x)}
+{ mv tmpB, tmpA              ; nop                             }
+  xm.zip tmpB, tmpA, 0
+  mv tmpB, tmpA
+  xm.zip tmpB, tmpA, 0
+
+  la t3, vpu_vec_0x80000000
+{ nop                             ; xm.vldr t3}
+  xm.vstrpv a, tmpA
+
+.L_finish:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/misc/util.S b/lib_xcore_math/src/arch/vx4b/misc/util.S
new file mode 100644
index 00000000..a4dbe488
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/misc/util.S
@@ -0,0 +1,103 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+/*  
+void f32_unpack(
+    int32_t* mantissa,
+    exponent_t* exp,
+    float input);
+*/
+#define FUNCTION_NAME   f32_unpack
+#define NSTACKWORDS     (0)
+
+#define mant_out    x10
+#define exp_out     x11
+#define input       x12
+#define sign        x13
+#define exp         x28
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.fsexp sign, exp, input
+    xm.fmant input, input
+
+#undef input
+#define mant        x12
+
+    // interesting way of subtracting 23 without using registers
+    addi exp, exp, -24
+{   addi exp, exp, 1             ;   xm.brff sign, .L_xuf_lblA        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+{   xm.neg mant, mant              ; nop                               }
+    .L_xuf_lblA:
+{ nop                               ;   sw mant,0       ( mant_out)}
+{ nop                               ;   sw exp,0         ( exp_out)}
+    xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+#undef mant_out
+#undef exp_out
+#undef mant
+#undef exp
+#undef sign
+
+
+
+/*  
+float s32_to_f32(
+    const int32_t mantissa,
+    const exponent_t exp);
+*/
+#define FUNCTION_NAME   s32_to_f32
+#define NSTACKWORDS     (0)
+
+#define mant        x10
+#define exp         x11
+#define sign        x12
+#define zero        x13
+#define tmp         x28
+
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+FUNCTION_NAME:
+  
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    srai sign, mant, 31
+  { li tmp, 23                 ; li zero, 0                 }  
+  { add exp, exp, tmp           ; xm.brff sign, .L_pack_not_neg    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    xm.neg mant, mant
+  .L_pack_not_neg:
+    xm.fmake mant, sign, exp, zero, mant
+  { nop                             ; xm.retsp (NSTACKWORDS)*4           }
+
+.L_func_end_pack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_pack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/misc/vect_copy.S b/lib_xcore_math/src/arch/vx4b/misc/vect_copy.S
new file mode 100644
index 00000000..ecb29559
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/misc/vect_copy.S
@@ -0,0 +1,60 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+headroom_t vect_s32_copy(
+    int32_t a[],
+    const int32_t b[],
+    unsigned length);
+*/
+
+#define NSTACKWORDS     (0)
+#define FUNCTION_NAME   vect_s32_copy
+
+#define a           x10
+#define b           x11
+#define len         x12
+#define tmp         x13
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  { li t3, 0                                ; srli tmp, len, 3                       }
+  { xm.zexti tmp, 1                               ; srli len, tmp, 1                       }
+  { add len, len, tmp                         ; xm.vsetc t3}
+  { li t3, 32                               ; xm.bt tmp, .L_loop_mid                   }
+  { nop                                           ; xm.bu .L_loop_top                        }
+  
+  .p2align 4
+  .L_loop_top:
+    { add b, b, t3                             ; xm.vldd b}
+    { add a, a, t3                             ; xm.vstd a}
+    .L_loop_mid:
+    { add b, b, t3                             ; xm.vldd b}
+    { addi len, len, -1                           ; xm.vstd a}
+    { add a, a, t3                             ; xm.bt len, .L_loop_top                   }
+  .L_loop_bot:
+
+.L_finish:
+  { li a0, 31                                ; xm.vgetc t3}
+  { xm.zexti t3, 5                               ; nop                                       }
+  { sub a0, a0, t3                           ; xm.retsp (NSTACKWORDS)*4                     }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;  .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;               .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;              .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;            .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/misc/vect_float_s32_ln_prepare.S b/lib_xcore_math/src/arch/vx4b/misc/vect_float_s32_ln_prepare.S
new file mode 100644
index 00000000..20d881c8
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/misc/vect_float_s32_ln_prepare.S
@@ -0,0 +1,122 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+void vect_float_s32_ln_prepare(
+    q2_30 a[8],
+    q8_24 exp_mod[8],
+    const float_s32_t b[]);
+*/
+
+
+#define NSTACKWORDS     (8)
+
+#define FUNCTION_NAME   vect_float_s32_ln_prepare
+
+
+#define a           x10 
+#define exp_mod     x11 
+#define b           x12
+#define mantA       x13
+#define expA        x18
+#define mantB       x19
+#define expB        x20
+#define tmpA        x21
+#define tmpB        x22
+#define tmpC        x23
+
+.text
+.p2align 2
+
+
+
+.L_32_Q24:
+.word 0x20000000,0x20000000,0x20000000,0x20000000,0x20000000,0x20000000,0x20000000,0x20000000
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+  xm.stdsp  s7,s6,16
+
+{ li t3, 0                  ; li tmpC, 24                }
+{ li t3, 1                  ; xm.vsetc t3}
+
+  xm.lddi  mantA,expA, 0(b)
+  xm.lddi  mantB,expB, 8(b)
+{ xm.cls tmpA, mantA             ; nop                               }
+{ nop                                  ; xm.cls tmpB, mantB             }
+{ sub expA, expA, tmpA        ; sub expB, expB, tmpB        }
+{ addi tmpA, tmpA, -1           ; addi tmpB, tmpB, -1           }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  expA,expB, 0(exp_mod)
+  xm.stdi  mantA,mantB, 0(a)
+
+  xm.lddi  mantA,expA, 16(b)
+  xm.lddi  mantB,expB, 24(b)
+{ xm.cls tmpA, mantA             ; nop                               }
+{ nop                                  ; xm.cls tmpB, mantB             }
+{ sub expA, expA, tmpA        ; sub expB, expB, tmpB        }
+{ addi tmpA, tmpA, -1           ; addi tmpB, tmpB, -1           }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  expA,expB, 8(exp_mod)
+  xm.stdi  mantA,mantB, 8(a)
+
+  xm.lddi  mantA,expA, 32(b)
+  xm.lddi  mantB,expB, 40(b)
+{ xm.cls tmpA, mantA             ; nop                               }
+{ nop                                  ; xm.cls tmpB, mantB             }
+{ sub expA, expA, tmpA        ; sub expB, expB, tmpB        }
+{ addi tmpA, tmpA, -1           ; addi tmpB, tmpB, -1           }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  expA,expB, 16(exp_mod)
+  xm.stdi  mantA,mantB, 16(a)
+
+  xm.lddi  mantA,expA, 48(b)
+  xm.lddi  mantB,expB, 56(b)
+{ xm.cls tmpA, mantA             ; nop                               }
+{ nop                                  ; xm.cls tmpB, mantB             }
+{ sub expA, expA, tmpA        ; sub expB, expB, tmpB        }
+{ addi tmpA, tmpA, -1           ; addi tmpB, tmpB, -1           }
+{ xm.shl mantA, mantA, tmpA      ; xm.shl mantB, mantB, tmpB      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantA, mantA, tmpA      \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mantB, mantB, tmpB      \nMessage: The shift amount is not 32" */
+  xm.stdi  expA,expB, 24(exp_mod)
+  xm.stdi  mantA,mantB, 24(a)
+
+  xm.vlashr a, t3
+{ xm.neg tmpC, tmpC              ; nop}
+{nop ; xm.vstr a}
+
+lui t3, %hi(.L_32_Q24)
+  addi t3,t3, %lo(.L_32_Q24)
+  xm.vlashr exp_mod, tmpC
+{ nop                             ; xm.vladd t3}
+{ nop                             ; xm.vstr exp_mod}
+
+
+.L_finish:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s7,s6,16
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/misc/xs3_memcpy.S b/lib_xcore_math/src/arch/vx4b/misc/xs3_memcpy.S
new file mode 100644
index 00000000..7530adb5
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/misc/xs3_memcpy.S
@@ -0,0 +1,53 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+void xs3_memcpy(
+    void* dst,
+    const void* src,
+    unsigned bytes);
+*/
+
+#define NSTACKWORDS     (0)
+#define FUNCTION_NAME   xs3_memcpy
+
+#define a           x10
+#define b           x11
+#define len         x12
+#define tmp         x13
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  { srli tmp, len, 5               ; xm.zexti len, 5                   }
+  { li t3, 32                   ; xm.brff tmp, .L_loop_bot           } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  .L_loop_top:
+    { add b, b, t3                 ; xm.vldd b}
+    { addi tmp, tmp, -1               ; xm.vstd a}
+    { add a, a, t3                 ; xm.bt tmp, .L_loop_top           }
+  .L_loop_bot:
+  { xm.mkmsk len, len                ; xm.brff len, .L_finish             }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { mv t3, b                    ; nop                               }
+  { nop                               ; xm.vldr t3}
+    xm.vstrpv a, len
+.L_finish:
+    xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;  .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;               .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;              .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;            .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/f32_log2.S b/lib_xcore_math/src/arch/vx4b/scalar/f32_log2.S
new file mode 100644
index 00000000..e325fc35
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/f32_log2.S
@@ -0,0 +1,69 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+
+/*  
+float f32_log2(
+    const float x);
+*/
+#define FUNCTION_NAME   f32_log2
+#define NSTACKWORDS     (4)
+
+#define x     x10
+#define tmp   x11
+#define exp   x12
+#define _0    x13
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4
+{ mv a1, a0                  ; addi a0,sp, 4              }
+  la t3, f32_normA
+  jalr t3
+
+{ li tmp, 1                  ; li t3, 23                 }
+{ li _0, 0                   ; lw exp, 4              (sp)}
+  xm.fmake tmp, _0, t3, _0, tmp
+  xm.fsub x, x, tmp
+  xm.fmake exp, _0, t3, _0, exp
+la t3, log2_ps       
+{ nop                             ; sw exp, 4              (sp)}
+{ mv a1, t3                 ; li a2, 11                  }
+la t3, f32_power_series
+  jalr t3
+{ nop                             ; lw exp, 4              (sp)}
+  xm.fadd x, x, exp
+  xm.retsp (NSTACKWORDS)*4
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+
+
+.weak FUNCTION_NAME.callees
+.add_to_set FUNCTION_NAME.callees,f32_normA.nstackwords
+.add_to_set FUNCTION_NAME.callees,f32_power_series.nstackwords
+.max_reduce FUNCTION_NAME.callee_maxstackwords,FUNCTION_NAME.callees,0
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS+FUNCTION_NAME.callee_maxstackwords
+.global FUNCTION_NAME.nstackwords
+
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/f32_norm.S b/lib_xcore_math/src/arch/vx4b/scalar/f32_norm.S
new file mode 100644
index 00000000..affeeb9d
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/f32_norm.S
@@ -0,0 +1,48 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+
+/*  
+float f32_normA(
+    exponent_t* exp,
+    const float x);
+*/
+#define FUNCTION_NAME   f32_normA
+#define NSTACKWORDS     (0)
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.fsexp a2, a3, a1
+  xm.fmant a1, a1
+{ addi a3, a3, 1               ; li t3, 0                  }
+{ addi a0, t3, -1              ; sw a3,0               ( a0)}
+  xm.fmake a0, a2, a0, t3, a1
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/f32_power_series.S b/lib_xcore_math/src/arch/vx4b/scalar/f32_power_series.S
new file mode 100644
index 00000000..8f44e955
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/f32_power_series.S
@@ -0,0 +1,133 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+float f32_power_series(
+    const float x,
+    const float coef[],
+    const unsigned terms_count);
+*/
+
+
+#define NSTACKWORDS     (4)
+
+#define FUNCTION_NAME   f32_power_series
+
+#define x       x10
+#define coef    x11
+#define count   x12
+#define acc     x13
+#define tmpA    x18
+#define pow     x19
+
+// these unroll settings seem to offer the best
+// tradeoff between code size and speed (3/8 should also work)
+#define UNROLL_LOG2   2
+#define UNROLL        4
+
+#define CAT_(A, B)    A##B
+#define CAT(A, B)     CAT_(A,B)
+
+#define FULL_LOOP_LBL CAT(.L_loop_, UNROLL)
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+
+{ li acc, 0                  ; mv pow, x                  }
+{ srli t3, count, UNROLL_LOG2 ; nop                             }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri t3, count, UNROLL_LOG2 \nMessage: The shift amount is not 32" */
+
+  .L_loop_top:
+  { xm.subi t3, count, UNROLL      ; xm.bt t3, .L_loop_full        }
+    slli t3, t3, 2
+  { add coef, coef, t3         ; xm.bru count                   }
+    xm.assert count
+#if (UNROLL_LOG2 >= 1)
+    tail .L_loop_1
+#endif
+#if (UNROLL_LOG2 >= 2)
+    tail .L_loop_2
+    tail .L_loop_3
+#endif
+#if (UNROLL_LOG2 >= 3)
+    tail .L_loop_4
+    tail .L_loop_5
+    tail .L_loop_6
+    tail .L_loop_7
+#endif
+
+    .L_loop_full:
+#if (UNROLL_LOG2 >= 3)
+    .L_loop_8:
+    { addi count, count, -1         ; lw tmpA,(UNROLL-8)*4    ( coef)}
+      xm.fmacc acc, acc, pow, tmpA
+      xm.fmul pow, pow, x
+    .L_loop_7:
+    { addi count, count, -1         ; lw tmpA,(UNROLL-7)*4    ( coef)}
+      xm.fmacc acc, acc, pow, tmpA
+      xm.fmul pow, pow, x
+    .L_loop_6:
+    { addi count, count, -1         ; lw tmpA,(UNROLL-6)*4    ( coef)}
+      xm.fmacc acc, acc, pow, tmpA
+      xm.fmul pow, pow, x
+    .L_loop_5:
+    { addi count, count, -1         ; lw tmpA,(UNROLL-5)*4    ( coef)}
+      xm.fmacc acc, acc, pow, tmpA
+      xm.fmul pow, pow, x
+#endif
+#if (UNROLL_LOG2 >= 2)    
+    .L_loop_4:
+    { addi count, count, -1         ; lw tmpA,(UNROLL-4)*4    ( coef)}
+      xm.fmacc acc, acc, pow, tmpA
+      xm.fmul pow, pow, x
+    .L_loop_3:
+    { addi count, count, -1         ; lw tmpA,(UNROLL-3)*4    ( coef)}
+      xm.fmacc acc, acc, pow, tmpA
+      xm.fmul pow, pow, x
+#endif
+#if (UNROLL_LOG2 >= 1)
+    .L_loop_2:
+    { addi count, count, -1         ; lw tmpA,(UNROLL-2)*4    ( coef)}
+      xm.fmacc acc, acc, pow, tmpA
+      xm.fmul pow, pow, x
+#endif
+    .L_loop_1:
+    { addi count, count, -1         ; lw tmpA,(UNROLL-1)*4    ( coef)}
+      xm.fmacc acc, acc, pow, tmpA
+      xm.fmul pow, pow, x
+
+  li t3, UNROLL*4
+  add coef, coef, t3
+  { srli t3, count, UNROLL_LOG2   ; xm.bt count, .L_loop_top       }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri t3, count, UNROLL_LOG2   \nMessage: The shift amount is not 32" */
+
+
+.L_finish:
+  mv a0, acc
+  xm.lddsp  s3,s2,0
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S b/lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S
new file mode 100644
index 00000000..6c5421ba
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S
@@ -0,0 +1,147 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+/*  
+float f32_sin(
+    const float theta);
+*/
+
+#define FUNCTION_NAME   f32_sin
+#define NSTACKWORDS     (8)
+
+
+
+#define r           x10
+#define phi         x11
+#define out_mul     x12
+#define tmp         x13
+
+#define _0          x23
+#define _1          x24
+
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,8
+  xm.stdsp  s5,s4,16
+  xm.stdsp  s7,s6,0
+{ li _0, 0                   ; sw s8, 24              (sp)}
+  xm.flt t3, r, _0
+{ nop                             ; li _1, 1                   }
+{ li out_mul, 23             ; xm.bt t3, .L_neg              }
+
+// sin(-x) = -sin(x) -- sin() has odd symmetry, so let's only deal with positive angles
+.L_pos:
+  xm.fmake out_mul, _0, out_mul, _0, _1 // +1.0f
+  tail .L_qwer
+.L_neg:
+  xm.fmake out_mul, _1, out_mul, _0, _1 // -1.0f
+  xm.fmul r, r, out_mul
+
+.L_qwer:
+
+// Normalize our angle to be 0 <= r < 4.0f
+// because  sin(x) = sin(x + k*2*pi) for int k
+  lw t3, two_over_pi
+  xm.fmul r, r, t3
+
+{ li t3, 21                 ; nop                             }
+  xm.fmake tmp, _0, t3, _0, _1 // +0.25f
+  xm.fmul tmp, r, tmp
+  xm.fsexp s6, t3, tmp
+  xm.fmant tmp, tmp
+{ xm.neg s6, t3                 ; li s5, 23                  }
+{ add s6, s6, s5              ; addi t3, t3, 2             }
+  xm.shr tmp, tmp, s6
+  xm.shl tmp, tmp, s6
+  xm.fmake tmp, _0, t3, _0, tmp  // tmp <--  4.0*floor(r/4.0)
+  
+  xm.fsub r, r, tmp  // r <-- r - 4.0*floor(r/4.0)
+
+// sin(pi + x) = -sin(x)  if   pi < x <= 2*pi
+{ li t3, 24                 ; nop                             }
+  xm.fmake tmp, _0, t3, _0, _1 // +2.0f
+  xm.flt t3, r, tmp
+{ nop                             ; xm.bt t3, .L_wert             }
+  xm.fsub r, r, tmp  // r <-- r - 2
+  xm.fsub out_mul, _0, out_mul   // out_mul <-- -out_mul
+.L_wert:
+
+// sin(pi/2 + x) = sin(pi - x)
+  xm.fmul t3, out_mul, out_mul // x28 <-- out_mul^2 = 1.0 
+  xm.flt t3, r, t3     // x28 <-- r < 1.0
+{ nop                             ; xm.bt t3, .L_erty             }
+  xm.fsub r, tmp, r  // r <-- 2.0 - r
+.L_erty:
+
+
+// Now, we have an angle r in the first quadrant
+// r is a normalized angle where   0.0 <= r < 1.0
+
+// Now apply power series for sin()
+
+#define total         x18
+#define coefs         x19
+
+lui t3, %hi(sin_coef)
+  addi t3,t3, %lo(sin_coef)
+{ mv coefs, t3            ; lw tmp,0               ( t3)}
+  xm.fmul phi, r, r
+  xm.fmul total, r, tmp   
+
+
+#define PS_TERM(N)            \
+  xm.fmul r, r, phi;              \
+  xm.ldw tmp, (N*4)(coefs);           \
+  xm.fmacc total, total, r, tmp;
+
+PS_TERM(1)
+PS_TERM(2)
+PS_TERM(3)
+PS_TERM(4)
+PS_TERM(5)
+PS_TERM(6)
+PS_TERM(7)
+
+// Apply final output multiplier
+  xm.fmul a0, total, out_mul
+
+  xm.lddsp  s7,s6,0
+  xm.lddsp  s5,s4,16
+  xm.lddsp  s3,s2,8
+{ nop                             ; lw s8, 24              (sp)}
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+
+.L_func_end:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/float_s32.c b/lib_xcore_math/src/arch/vx4b/scalar/float_s32.c
new file mode 100644
index 00000000..2490f7a2
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/float_s32.c
@@ -0,0 +1,87 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined(__VX4B__)
+#include <stdint.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "xmath/xmath.h"
+#include "vpu_helper.h"
+#include "xmath/xs3/vpu_scalar_ops.h"
+#include "vpu_const_vects.h"
+
+
+static inline
+int64_t maccs(int64_t acc, int32_t x, int32_t y)
+{
+  return acc + (((int64_t)x) * y);
+}
+
+static inline
+int32_t lextract(int64_t acc, unsigned pos)
+{
+  return (acc >> pos) & 0xFFFFFFFF;
+}
+
+float_s32_t float_s32_exp(
+    const float_s32_t b)
+{
+  float_s32_t res = {0,0};
+
+  const int32_t one    = 0x40000000;
+  const int32_t sqrt_2 = 0x5a82799a;
+  const int32_t log2_e = 0x5c551d95;
+  const int32_t ln_2   = 0x2c5c85fe;
+
+  headroom_t hr = HR_S32(b.mant);
+
+  int32_t tmp1 = vlashr32(b.mant, -(int)hr);
+  tmp1 = vlashr32(tmp1, 1);
+
+  res.exp = (b.exp - hr) + 1;
+
+  // compute y = x * log2(e)
+  int32_t y = lextract(maccs(0, log2_e, tmp1), 30);
+
+  if( res.exp >= 0 ){
+    res.mant = one;
+    res.exp = res.exp - 30;
+    return res;
+  }
+
+  int frac_bits = -res.exp;
+  
+  right_shift_t shr = -30 + frac_bits;
+
+  int32_t alpha = 0;
+  int32_t rho = 0;
+  int32_t beta = 0;
+  unsigned mask = 0;
+
+  if(frac_bits == 31){
+    alpha = (y < 0)? -1 : 0;
+    rho = y & 0x40000000;
+    beta = y & 0x3FFFFFFF;
+    beta = vlashr32(beta, shr);
+  } else if(frac_bits >= 32){
+    alpha = (y < 0)? -1 : 0;
+    rho = (y < 0)? 1 : 0;
+    beta = vlashr32(y, shr) + ((y < 0)? 0x20000000 : 0);
+  } else {
+    mask = (1 << (frac_bits-1)) - 1;
+    beta = y & mask;
+    tmp1 = y >> (frac_bits-1);
+    rho = tmp1 & 1;
+    alpha = tmp1 >> 1;
+    beta = vlashr32(beta, shr);
+  }
+
+  res.exp = alpha - 30;
+  int32_t two_to_rho = rho? sqrt_2 : one;
+  int32_t z = lextract(maccs(0, beta, ln_2), 30);
+  z = q30_exp_small(z);
+  res.mant = lextract(maccs(0, two_to_rho, z), 30);
+  return res;
+}
+
+#endif // defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/float_s32_exp.almost b/lib_xcore_math/src/arch/vx4b/scalar/float_s32_exp.almost
new file mode 100644
index 00000000..2897d8fb
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/float_s32_exp.almost
@@ -0,0 +1,153 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+
+/*  
+float_s32_t float_s32_exp(
+    const float_s32_t b);
+*/
+#define FUNCTION_NAME   float_s32_exp
+#define NSTACKWORDS     (8)
+
+
+#define STACK_RHO       (0)
+#define STACK_A         (1)
+
+
+
+#define b_0       x10
+#define b_1       x11
+
+#define tmp1    x12
+#define tmp2    x13
+#define tmp3    x18
+#define consts  x19
+
+.L_consts: 
+.L_none:   .word 0x00000000 
+.L_one:    .word 0x40000000 
+.L_sqrt_2: .word 0x5a82799a 
+.L_log2_e: .word 0x5c551d95 
+.L_ln_2:   .word 0x2c5c85fe 
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4
+  xm.stdsp  s3,s2,8
+  xm.stdsp  s5,s4,16
+la t3, .L_consts
+// Load input, reformat to have 1 bit of headroom and store in
+// output.
+
+{ mv consts, t3; nop}
+{ xm.cls t3, b_0               ; nop}
+
+{ addi t3, t3, -1             ; nop                             }
+{ xm.shl tmp1, tmp1, t3         ; addi t3, t3, -1             }
+  srai tmp1, tmp1, 1
+{ sub tmp2, b_1, t3              ; mv s4, tmp1}//a[0]
+{ nop                             ; mv s5, tmp2} //a[1]
+
+#undef b
+#define y     x11
+
+//// Compute y = x * log2(e)
+{ mv y, tmp1                 ; nop                             }
+{ li tmp1, 0                 ; lw t3,12          ( consts)}
+  xm.maccs tmp1, y, t3, y /// astew: Is this correct... isn't this doing  acc = y + log2(e) * y ??
+{ li t3, 30                 ; nop                             }
+  xm.lextract y, tmp1, y, t3, 32
+
+//// Deal with fractional bit count
+{ xm.clz tmp3, tmp2               ;  nop}
+{nop; xm.neg t3, tmp2                            }
+{  nop              ; xm.brff tmp3, .L_neg_exp          }
+
+//// If the exponent is non-negative, then the best estimate we can give is 2^(y<<exp)
+{ li tmp1, 30                ; lw y,4            ( consts)}
+{ sub tmp2, tmp2, tmp1        ; sw y,0                 ( a)}
+{ nop                             ; sw tmp2,4              ( a)}
+{ nop                             ; xm.bu .L_finish                }
+
+.L_neg_exp:
+
+  //// alpha = floor(y)
+  sra tmp1, y, t3
+
+  //// output exponent = alpha - 30
+{ li tmp2, 30                ; nop                             }
+{ sub tmp2, tmp1, tmp2        ; xm.zext y, t3                             }
+{ nop                 ; xm.stwi tmp2, 4(a)              }
+
+  // Put it in Q30 format
+{ li tmp2, 30                ; nop                             }
+{ xm.neg tmp2, tmp2              ; nop                             }
+{ add tmp2, tmp2, t3         ; sw a, (STACK_A)*4          (sp)}
+  {xm.shr y, y, tmp2 ; nop} /* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in ashri y, y, tmp2\nMessage: Shift amount may need adjustment but I don't know its value" */
+
+  //// If y is negative, we need to add 1 to it
+  srai tmp1, y, 31
+{ xm.neg tmp1, tmp1              ; nop                             }
+xm.ldw tmp1, tmp1      ( consts)
+{ add y, y, tmp1              ; addi consts, consts, 4       }
+
+
+  //// Get 2^rho
+{ li tmp1, 29                ; li tmp3, 29                }
+{ xm.shr tmp2, y, tmp1           ; xm.zext y, tmp3                }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri tmp2, y, tmp1           \nMessage: The shift amount is not 32" */
+{ addi consts, consts, -4       ; xm.ldw t3,tmp2       ( consts)}
+{ li tmp1, 0                 ; sw t3, (STACK_RHO)*4      (sp)}
+
+  //// Need to compute z = beta * ln(2)
+
+{ li tmp3, 30                ; lw t3,16          ( consts)}
+  xm.lmul tmp1, y, y, t3, tmp1, tmp1
+  xm.lextract y, tmp1, y, tmp3, 32
+
+  //// Now we need to actually compute the power series
+  //   on y, given that it is a Q30
+lui t3, %hi(q30_exp_small)
+  addi t3,t3, %lo(q30_exp_small)
+{ mv a0, y                   ; jalr t3                     }
+
+  //// Now we just need to multiply our rho and beta factors
+{ li a2, 0                   ; lw a1, (STACK_RHO)*4       (sp)}
+  xm.lmul a1, a0, a1, a0, a2, a2
+  xm.lextract a0, a1, a0, tmp3, 32
+
+  //// That's our result.
+{ nop                             ; lw a1, (STACK_A)*4         (sp)}
+{ nop                             ; sw a0,0               ( a1)}
+
+.L_finish:
+  xm.lddsp  s3,s2,8
+  xm.lddsp  s5,s4,16
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords, NSTACKWORDS + q30_exp_small.nstackwords; /* Translation error on this line: unexpected token at position 71. */ 
+.global FUNCTION_NAME.nstackwords
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/q24_logistic_fast.S b/lib_xcore_math/src/arch/vx4b/scalar/q24_logistic_fast.S
new file mode 100644
index 00000000..c69848e7
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/q24_logistic_fast.S
@@ -0,0 +1,80 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+
+/*  
+q8_24 q24_logistic_fast(
+    const q8_24 x);
+*/
+#define FUNCTION_NAME   q24_logistic_fast
+#define NSTACKWORDS     (8+8)
+
+#define STACK_VEC   (NSTACKWORDS-10)
+
+.L_log_slope: 
+  .word  1015490930, 640498971, 297985800, 120120271, 46079377, 17219453, 6371555, 3717288
+.L_log_offset: 
+  .word 8388608, 9853420, 12529304, 14613666, 15770555, 16334225, 16588473, 16661050
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+{ xm.clz t3, a0                 ; xm.not a1, a0                  }
+{ srli a2, a1, 24              ; li a3, 0                   }
+{ xm.mkmski s2, 24                ; xm.brff t3, .L_neg_input        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+.L_pos_input:
+  { srli a1, a0, 24              ; nop                             }
+  { srli t3, a1, 3              ; nop                             }
+  { xm.ldap t3, .L_log_slope      ; xm.brff t3, .L_not_big          }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  .L_big:
+    { mv a0, s2                  ; xm.bu .L_finish                }
+  .L_not_big:
+  { mv a2, a0                  ; xm.ldw a3, a1             (t3)}
+  { xm.ldap t3, .L_log_offset     ; nop                             }
+  { li a1, 0                   ; xm.ldw a0, a1             (t3)}
+  xm.maccu a0, a1, a3, a2
+  tail .L_finish
+.L_neg_input:
+  { srli t3, a2, 3              ; nop                             }
+  { xm.ldap t3, .L_log_slope      ; xm.brff t3, .L_not_small        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  .L_small:
+    { li a0, 0                   ; xm.bu .L_finish                }
+  .L_not_small:
+  { xm.ldap t3, .L_log_offset     ; xm.ldw a3, a2             (t3)}
+  { li a0, 0                   ; xm.ldw a2, a2             (t3)}
+    xm.maccu a2, a0, a3, a1
+  { sub a0, s2, a2              ; nop                             }
+
+.L_finish:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/q30_exp_small.S b/lib_xcore_math/src/arch/vx4b/scalar/q30_exp_small.S
new file mode 100644
index 00000000..71140fef
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/q30_exp_small.S
@@ -0,0 +1,115 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+
+/*  
+q2_30 q30_exp_small(
+    const q2_30 x);
+*/
+#define FUNCTION_NAME   q30_exp_small
+#define NSTACKWORDS     (8)
+
+#define x       x10
+#define coef    x11
+#define acc_hi  x12
+#define acc_lo  x13
+#define tmp     x18
+#define pow     x19
+#define _30     x20
+#define _0      x21
+#define sign    x24  // 1 means positive, 0 negative
+
+.L_ps_pos_q30: 
+  .word  0x40000000,  0x20000000,  0x0AAAAAAB, 0x02AAAAAB,  0x00888889, 0x0016C16C,  0x00034034, 0x00006807,  0x00000B8F
+.L_ps_neg_q30: 
+  .word -0x40000000,  0x20000000, -0x0AAAAAAB, 0x02AAAAAB, -0x00888889, 0x0016C16C, -0x00034034, 0x00006807, -0x00000B8F
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+{ xm.ldap t3, .L_ps_pos_q30     ; sw s8, 16              (sp)}
+{ mv coef, t3               ; li _30, 30                 }
+{ li _0, 0                   ; li acc_hi, 0               }
+{ xm.slt sign, _0, x             ; nop                             } 
+{ li acc_lo, 0               ; xm.bt sign, .L_pos_x           }
+
+.L_neg_x:
+lui t3, %hi(.L_ps_neg_q30)
+  addi t3,t3, %lo(.L_ps_neg_q30)
+{ xm.neg x, x                    ; mv coef, t3               }
+.L_pos_x:
+
+{ nop                             ; lw tmp,0            ( coef)}
+
+  xm.maccs acc_hi, acc_lo, tmp, tmp
+  xm.maccs acc_hi, acc_lo, tmp, x
+  xm.lmul t3, pow, x, x, _0, _0
+  xm.lextract pow, t3, pow, _30, 32
+{ nop                             ; lw tmp,4            ( coef)}
+  xm.maccs acc_hi, acc_lo, tmp, pow 
+  xm.lmul t3, pow, pow, x, _0, _0
+  xm.lextract pow, t3, pow, _30, 32
+{ nop                             ; lw tmp,8            ( coef)}
+  xm.maccs acc_hi, acc_lo, tmp, pow 
+  xm.lmul t3, pow, pow, x, _0, _0
+  xm.lextract pow, t3, pow, _30, 32
+{ nop                             ; lw tmp,12            ( coef)}
+  xm.maccs acc_hi, acc_lo, tmp, pow 
+  xm.lmul t3, pow, pow, x, _0, _0
+  xm.lextract pow, t3, pow, _30, 32
+{ nop                             ; lw tmp,16            ( coef)}
+  xm.maccs acc_hi, acc_lo, tmp, pow 
+  xm.lmul t3, pow, pow, x, _0, _0
+  xm.lextract pow, t3, pow, _30, 32
+{ nop                             ; lw tmp,20            ( coef)}
+  xm.maccs acc_hi, acc_lo, tmp, pow 
+  xm.lmul t3, pow, pow, x, _0, _0
+  xm.lextract pow, t3, pow, _30, 32
+{ nop                             ; lw tmp,24            ( coef)}
+  xm.maccs acc_hi, acc_lo, tmp, pow 
+  xm.lmul t3, pow, pow, x, _0, _0
+  xm.lextract pow, t3, pow, _30, 32
+{ nop                             ; lw tmp,28            ( coef)}
+  xm.maccs acc_hi, acc_lo, tmp, pow 
+  xm.lmul t3, pow, pow, x, _0, _0
+  xm.lextract pow, t3, pow, _30, 32
+{ nop                             ; lw tmp,32            ( coef)}
+  xm.maccs acc_hi, acc_lo, tmp, pow 
+  
+  // result
+  xm.lextract a0, acc_hi, acc_lo, _30, 32
+
+.L_finish:
+  lw s8, 16(sp)
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/q30_odd_powers.S b/lib_xcore_math/src/arch/vx4b/scalar/q30_odd_powers.S
new file mode 100644
index 00000000..d1b05268
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/q30_odd_powers.S
@@ -0,0 +1,74 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4; /* Translation error on this line: unexpected token at position 8. */ 
+
+/*  
+void s32_odd_powers(
+    int32_t a[],
+    const int32_t b,
+    const unsigned count,
+    const right_shift_t shr);
+*/
+#define FUNCTION_NAME   s32_odd_powers
+#define NSTACKWORDS     (4)
+
+#define a         x10
+#define b         x11
+#define len       x12
+#define shr       x13
+
+#define acc_lo    x18
+#define acc_hi    x19
+
+#define b_sqr     x28
+
+
+
+FUNCTION_NAME:
+
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+
+{ nop                             ; addi len, len, -1             }
+{ li acc_lo, 0               ; li acc_hi, 0               }
+  xm.maccs acc_hi, acc_lo, b, b
+  xm.lextract b_sqr, acc_hi, acc_lo, shr, 32
+
+.L_loop_top:
+  { addi len, len, -1             ; sw b,0                 ( a)}
+  { li acc_lo, 0               ; li acc_hi, 0               }
+    xm.maccs acc_hi, acc_lo, b, b_sqr
+    xm.lextract b, acc_hi, acc_lo, shr, 32
+  { addi a, a, 4                 ; xm.bt len, .L_loop_top         }
+
+  sw b,0( a)
+
+  xm.lddsp  s3,s2,0
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/q30_powers.S b/lib_xcore_math/src/arch/vx4b/scalar/q30_powers.S
new file mode 100644
index 00000000..278a954e
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/q30_powers.S
@@ -0,0 +1,110 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.align 4;
+
+/*  
+void q30_powers(
+    q2_30 a[],
+    const q2_30 b,
+    const unsigned length);
+*/
+#define FUNCTION_NAME   q30_powers
+#define NSTACKWORDS     (8+(8*2))
+
+#define ST_VEC_TMP2     (NSTACKWORDS-8-2)
+#define ST_VEC_TMP      (NSTACKWORDS-16-2)
+
+#define a             x10
+#define b             x11
+#define len           x12
+#define tmpA          x13
+#define tmpB          x18
+#define pow           x19
+#define _30           x20
+#define vec_tmp       x21
+
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,8
+  xm.stdsp  s5,s4,0
+  xm.stdsp  s7,s6,16 
+
+{ addi vec_tmp,sp, (ST_VEC_TMP)*4;  li t3, 0                 }
+{ li t3, 0                  ; nop                             }
+{ li _30, 30                 ; xm.vsetc t3}
+  lw pow, vpu_vec_0x40000000
+  xm.stdi  pow,b, 0  (vec_tmp)// b^0, b^1
+
+{ li tmpA, 0                 ; li tmpB, 0                 }
+  xm.maccs tmpA, tmpB, b, b
+  xm.lextract pow, tmpA, tmpB, _30, 32  
+
+{ li tmpA, 0                 ; li tmpB, 0                 }
+  xm.maccs tmpA, tmpB, b, pow
+  xm.lextract t3, tmpA, tmpB, _30, 32
+  xm.stdi  pow,t3, 8 (vec_tmp)// b^2, b^3
+  
+{ li tmpA, 0                 ; li tmpB, 0                 }
+  xm.maccs tmpA, tmpB, pow, pow
+  xm.lextract pow, tmpA, tmpB, _30, 32
+
+  xm.stdi  pow,pow, 32 (vec_tmp)// eight b^4's
+  xm.stdi  pow,pow, 40(vec_tmp)
+  xm.stdi  pow,pow, 48(vec_tmp)
+  xm.stdi  pow,pow, 56(vec_tmp)
+
+{ li tmpB, 32                ; xm.vclrdr                      }
+{ addi t3,sp, (ST_VEC_TMP2)*4   ; xm.vladd vec_tmp}
+{ xm.mkmski tmpA, 16              ; xm.vlmul0 t3}
+
+  addi t3, vec_tmp, 16
+  xm.vstrpv t3, tmpA
+
+{ srli tmpA, len, 3            ; addi t3,sp, (ST_VEC_TMP)*4    }
+{ xm.zexti len, 3                 ; xm.vldr t3}
+{ addi t3,sp, (ST_VEC_TMP2)*4   ; nop                             }
+{ slli len, len, 2             ; xm.brff tmpA, .L_loop_bot        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+  .L_loop_top:
+  { addi tmpA, tmpA, -1           ; xm.vstr a}
+  { add a, a, tmpB              ; xm.vlmul0 t3}
+  { nop                             ; xm.vlmul0 t3}
+  { nop                             ; xm.bt tmpA, .L_loop_top        }
+  .L_loop_bot:
+
+{ xm.mkmsk len, len              ; xm.brff len, .L_finish           }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  xm.vstrpv a, len
+  
+.L_finish:
+  xm.lddsp  s3,s2,8
+  xm.lddsp  s5,s4,0
+  xm.lddsp  s7,s6,16 
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end_unpack:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/radians_to_sbrads.S b/lib_xcore_math/src/arch/vx4b/scalar/radians_to_sbrads.S
new file mode 100644
index 00000000..5d82e572
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/radians_to_sbrads.S
@@ -0,0 +1,95 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+C_API
+sbrad_t radians_to_sbrads(
+    const radian_q24_t theta);
+*/
+
+#define FUNCTION_NAME   radians_to_sbrads
+#define NSTACKWORDS     (4)
+
+#define theta       x10
+#define tmp         x11
+#define accA        x12
+#define accB        x13
+
+
+.text
+.p2align 4
+
+
+/*
+
+  The implementation of this function relies on controlled integer
+  overflows to handle the symmetries of of the argument to sin().
+
+  The MACCS instruction multiplies theta by a (Q31) scalar to change
+  the period from 2.0*pi to 4.0.
+
+  The LEXTRACT instruction chooses to extract so that the bit 
+  corresponding to one period (of 4.0) is just above the MSb, so that
+  extra periods are ignored. This number should be thought of as an 
+  unsigned 32-bit integer.
+
+  theta is then in a UQ30 format, where the output is specified to be
+  in Q31 format.
+
+  A left-shift takes care of subtracting 2.0 (if needed) and converting
+  to Q31.
+
+  The only thing that remains is deciding in which cases the result will
+  be multiplied by -1, which correspond to the top 2 bits of theta.
+
+*/
+.L_vals:
+  .word 0x517cc1b7
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+{ li tmp, 25                 ; xm.ldap t3, .L_vals           }
+{ li accA, 0                 ; li accB, 0                 }
+{ nop                             ; lw t3,0             ( t3)}
+  xm.maccs accA, accB, t3, theta
+  xm.lextract theta, accA, accB, tmp, 32
+{ li t3, 30                 ; nop                             }
+{ xm.shr tmp, theta, t3         ; slli theta, theta, 1         }
+{xm.shli tmp, tmp, 2; nop}
+{ nop                             ; xm.bru tmp                     }
+{ mv a0, theta                   ; nop           }
+{xm.retsp (NSTACKWORDS)*4  ; nop           }
+{ xm.neg a0, theta               ; nop           }
+{xm.retsp (NSTACKWORDS)*4  ; nop           }
+{ xm.neg a0, theta               ; nop           }
+{xm.retsp (NSTACKWORDS)*4  ; nop           }
+{ mv a0, theta                   ; nop           }
+{xm.retsp (NSTACKWORDS)*4  ; nop           }
+.L_func_end:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/sbrad_sin.S b/lib_xcore_math/src/arch/vx4b/scalar/sbrad_sin.S
new file mode 100644
index 00000000..a29ec27f
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/sbrad_sin.S
@@ -0,0 +1,118 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+
+/*  
+Takes a normalized angle between 0.0 and 1.0 in Q31 format
+and returns sin of that angle in Q30.
+
+int32_t sbrad_sin(
+    const sbrad_t theta);
+*/
+
+#define FUNCTION_NAME   sbrad_sin
+#define NSTACKWORDS     (16)
+
+#define VEC_R           (NSTACKWORDS - 12)
+
+#define a         x10
+#define r         x11
+#define out_mul   x12
+#define vec_r     x13
+#define tmpA      x18
+#define tmpB      x19
+#define _31       x20
+
+.text
+.p2align 4
+
+.L_vec_b:
+  .word 0x6487ed51, -0x52aef399, 0x519af19d, -0x4cb4b33a
+  .word 0x541e0d21, -0x78c1d3f8, 0x7a3d0d34, -0x5beb6e7d
+.L_vec_s_hat:
+  .word 1, 2, 5, 9, 14, 20, 26, 32
+.L_weights:
+  .word 0x40000000, 0x40000000, 0x40000000, 0x40000000
+  .word 0x40000000, 0x40000000, 0x00000000, 0x00000000
+  
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,8
+  xm.stdsp  s5,s4,0
+{ li t3, 0                       ; addi vec_r,sp, (VEC_R)*4        }
+{ xm.slt out_mul, a, t3          ; xm.vsetc t3                     }
+{ li _31, 31                     ; xm.brff out_mul, .L_hgfd        }
+{ xm.neg a, a                    ; nop                             }
+.L_hgfd:
+
+  xm.lmul tmpA, tmpB, a, a, t3, t3
+  xm.lextract r, tmpA, tmpB, _31, 32
+  xm.lmul tmpA, tmpB, a, r, t3, t3
+  xm.lextract tmpA, tmpA, tmpB, _31, 32
+  xm.stdi  a,tmpA, 0 (vec_r)// a, a^3
+
+#undef a  // no longer needed
+#define tmpC     x10
+
+  xm.lmul tmpA, tmpB, tmpA, r, t3, t3
+  xm.lextract tmpB, tmpA, tmpB, _31, 32
+  xm.lmul tmpA, tmpC, tmpB, r, t3, t3
+  xm.lextract tmpA, tmpA, tmpC, _31, 32
+  xm.stdi  tmpB,tmpA, 8 (vec_r)// a^5, a^7
+
+  xm.lmul tmpA, tmpB, tmpA, r, t3, t3
+  xm.lextract tmpB, tmpA, tmpB, _31, 32
+  // stw tmpB, vec_r[4] // if we only wanted 5 terms
+  xm.lmul tmpA, tmpC, tmpB, r, t3, t3
+  xm.lextract tmpA, tmpA, tmpC, _31, 32
+  xm.stdi  tmpB,tmpA, 16 (vec_r)// a^9, 1^11
+
+// Now that we've filled in vec_R[], we just need to do the VPU stuff.
+{ xm.ldap t3, .L_vec_b            ; xm.vclrdr                      }
+{ xm.ldap t3, .L_vec_s_hat        ; xm.vldc t3}
+{ nop                             ; xm.vlmacc0 vec_r}
+ xm.vlsat t3
+{ mv t3, vec_r                    ; nop}
+{ nop                             ; xm.vstr t3}
+{ xm.ldap t3, .L_weights          ; xm.vclrdr                      }
+{ nop                             ; xm.vldc t3}
+{ mv t3, vec_r                    ; xm.vlmaccr0 vec_r}
+{ nop                             ; xm.vstr t3}
+{ nop                             ; lw a0,0            ( vec_r)}
+  xm.lddsp  s3,s2,8
+  xm.lddsp  s5,s4,0
+{ nop                             ; xm.bt out_mul, .L_gpgp         }
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+.L_gpgp:
+{ xm.neg a0, a0                   ; nop}
+{ xm.retsp (NSTACKWORDS*4)        ; nop}   
+
+.L_func_end:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/sbrad_tan.S b/lib_xcore_math/src/arch/vx4b/scalar/sbrad_tan.S
new file mode 100644
index 00000000..0d2507e9
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/sbrad_tan.S
@@ -0,0 +1,150 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+
+/*  
+  Takes a normalized angle between -0.5 and 0.5 in Q31 format  (corresponds to -pi/4 to pi/4)
+  and returns tan() of that angle in Q30.
+
+  tangent of any angle outside [-pi/4, pi/4] can be derived from a value within that range based on 
+  the identities of the tangent function.
+
+  q2_30 sbrad_tan(
+      const sbrad_t theta);
+*/
+
+#define FUNCTION_NAME   sbrad_tan
+#define NSTACKWORDS     (16)
+
+#define VEC_R           (NSTACKWORDS - 12)
+
+#define a         x10
+#define r         x11
+#define out_mul   x12
+#define vec_r     x13
+#define tmpA      x18
+#define tmpB      x19
+#define _30       x20
+
+.text
+.p2align 4
+
+.L_vec_b:
+.word 0x6487ED51, 0x52AEF398, 0x519AF19D, 0x517FFE6D
+.word 0x517D1CB8, 0x517CCBC9, 0x517CC2D5
+
+// The final element of vec_b[] is the tail term that includes the convergent sum
+// of the geometric series [1, alpha^2, alpha^4, alpha^6, ...]. In my notes I designated
+// this coefficient as  (4/3)*beta, where beta = 1.27323954.
+// When you normalize angles by multiplying by 2/pi and look at the power series about 0 of
+// the function tan(theta), the (constant) coefficients with each term converge asymptotically
+// (as term index increases) towards the value beta.
+// (4/3) is because R = alpha^2, where 0 <= alpha <= 0.5, so 0 <= R <= 0.25, and the
+// convergent geometric series is in R --> [1, R, R^2, R^3, ...], which converges to
+// (1/(1-R)). Given the bounds for R,    1 <= (1/(1-R)) <= (4/3).
+
+// Specifically, the final term is  (1/(1-R))*beta*(alpha^15), but we don't want to do a division,
+// so by just picking a value of R and always using that, we should significantly improve our
+// absolute error (compared to not including the final convergent sum term at all). We should prefer
+// larger values of R because the absolute error is greater there, but it looks like we get the
+// best results when we haven't gone quite all the way to (4/3).
+
+// I've experimentally found that the following seems to give the lowest absolute error in the test.
+.word 0x6b6cb9bd // Q30( beta * (4/3)^( 0.9605835543766578 ) )
+// .word 0x6ca65798 // Q30( beta * (4/3)^(1) )
+.L_vec_s_hat:
+  .word 1,3,5,7,9,11,13,15
+.L_weights:
+  .word 0x40000000, 0x40000000, 0x40000000, 0x40000000
+  .word 0x40000000, 0x40000000, 0x40000000, 0x40000000
+  
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4
+  xm.stdsp  s3,s2,8
+  xm.stdsp  s5,s4,0
+{ li t3, 0                       ; addi vec_r,sp, (VEC_R)*4        }
+{ xm.slt out_mul, a, t3          ; xm.vsetc t3                     } // Result gets multiplied by -1 if
+{ li _30, 30                     ; xm.brff out_mul, .L_hgfd        } 
+{ xm.neg a, a                    ; nop                             }
+.L_hgfd:
+
+  xm.lmul tmpA, tmpB, a, a, t3, t3
+  xm.lextract r, tmpA, tmpB, _30, 32  // extract theta^2
+  xm.lmul tmpA, tmpB, a, r, t3, t3 // theta * theta^2
+  xm.lextract tmpA, tmpA, tmpB, _30, 32 
+  xm.stdi  a,tmpA, 0   (vec_r)// theta, theta^3
+
+#undef a  // no longer needed
+#define tmpC     x10
+
+  xm.lmul tmpA, tmpB, tmpA, r, t3, t3
+  xm.lextract tmpB, tmpA, tmpB, _30, 32 // theta^5
+  xm.lmul tmpA, tmpC, tmpB, r, t3, t3
+  xm.lextract tmpA, tmpA, tmpC, _30, 32 // theta^7
+  xm.stdi  tmpB,tmpA, 8 (vec_r)// theta^5, theta^7
+ 
+  xm.lmul tmpA, tmpB, tmpA, r, t3, t3
+  xm.lextract tmpB, tmpA, tmpB, _30, 32 // theta^9
+  // stw tmpB, vec_r[4] // if we only wanted 5 terms
+  xm.lmul tmpA, tmpC, tmpB, r, t3, t3
+  xm.lextract tmpA, tmpA, tmpC, _30, 32 // theta^11
+  xm.stdi  tmpB,tmpA, 16 (vec_r)// theta^9, theta^11
+
+  xm.lmul tmpA, tmpB, tmpA, r, t3, t3
+  xm.lextract tmpB, tmpA, tmpB, _30, 32 // theta^13
+  xm.lmul tmpA, tmpC, tmpB, r, t3, t3
+  xm.lextract tmpA, tmpA, tmpC, _30, 32 // theta^15
+  xm.stdi  tmpB,tmpA, 24 (vec_r)// theta^13, theta^15
+
+// Now that we've filled in vec_R[], we just need to do the VPU stuff.
+// Note: All coefficients are positive and so are all elements or vec_r[],
+//       and we know they can't add to more than 1.0
+
+{ xm.ldap t3, .L_vec_b            ; xm.vclrdr                      }
+{ xm.ldap t3, .L_vec_s_hat        ; xm.vldc t3} // vC[] <-- P.S. coefficients
+{ nop                             ; xm.vlmacc0 vec_r} // inner product with power vect
+xm.vlsat t3
+{ mv t3, vec_r                    ; nop } // ensure they're all in the same q-format
+{ nop                             ; xm.vstr t3} 
+{ xm.ldap t3, .L_weights          ; xm.vclrdr                      }
+{ nop                             ; xm.vldc t3} 
+{ mv t3, vec_r                    ; xm.vlmaccr0 vec_r} // add them together
+{ nop                             ; xm.vstr t3}
+{ nop                             ; lw a0,0            ( vec_r)}
+  xm.lddsp  s3,s2,8 
+  xm.lddsp  s5,s4,0 
+{ nop                             ; xm.bt out_mul, .L_gpgp         }
+{ nop                             ; xm.retsp (NSTACKWORDS)*4           }
+.L_gpgp:
+{ xm.neg a0, a0                   ; nop}
+{ xm.retsp (NSTACKWORDS*4)        ; nop}      
+
+.L_func_end:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S
new file mode 100644
index 00000000..03683e88
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S
@@ -0,0 +1,511 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+
+#define CAT_(A, B)    A##B
+#define CAT(A, B)     CAT_(A,B)
+
+#define FUNC_START  \                
+    .text ; \
+    .globl FUNCTION_NAME ; \
+    .type FUNCTION_NAME,@function ; \
+    .p2align 4
+
+
+#define FUNC_END                                \
+    .set FUNCTION_NAME.nstackwords,NSTACKWORDS;   \  
+    .global FUNCTION_NAME.nstackwords;      \
+    .set FUNCTION_NAME.maxcores,1;                 \ 
+    .global FUNCTION_NAME.maxcores;         \
+    .set FUNCTION_NAME.maxtimers,0;                \ 
+    .global FUNCTION_NAME.maxtimers;        \
+    .set FUNCTION_NAME.maxchanends,0;              \ 
+    .global FUNCTION_NAME.maxchanends;      \
+    CAT(.L_size_end_, FUNCTION_NAME):              \
+    .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vladd16(
+    const int16_t x, 
+    const int16_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vladd16
+#define NSTACKWORDS     (4)
+
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vladd t3}
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;   xm.retsp (NSTACKWORDS) *4                      }
+FUNC_END
+
+// //.cc_bottom FUNCTION_NAME.function; 
+// .set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; 
+// .set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores; 
+// .set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers; 
+// .set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends; 
+
+// CAT(.L_size_end_, FUNCTION_NAME): 
+//     .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vlsub16(
+    const int16_t x, 
+    const int16_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlsub16
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a1, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vlsub t3}
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sexti a0, 16                             ;   nop}
+{nop; xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vlashr16(
+    const int16_t x,
+    const right_shift_t shr);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlashr16
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{   xm.mkmski a2, 4                             ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+    xm.vlashr t3, a1
+    xm.vstrpv t3, a2
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vpos16(
+    const int16_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vpos16
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vpos                                    }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;   nop}
+{nop; xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vsign16(
+    const int16_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vsign16
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vsign                                   }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+unsigned vdepth1_16(
+    const int16_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vdepth1_16
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vdepth1                                 }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.zexti a0, 1                              ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vdepth8_16(
+    const int16_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vdepth8_16
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vdepth8                                 }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 8                             ;   nop}
+{nop; xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vlmul16(
+    const int16_t x,
+    const int16_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmful16
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vlmul0 t3}
+xm.vlmul1 t3
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+vpu_int16_acc_t vlmacc16(
+    const vpu_int16_acc_t acc,
+    const int16_t x,
+    const int16_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmacc16
+#define NSTACKWORDS     (8)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   srli a3, a0, 16                          ;   xm.zexti a0, 16                             }
+{   addi t3,sp, 0                         ;   sw a3, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{ nop                                           ;   xm.vldc t3}
+{ nop                                           ;   sw a2, 0                           (sp)}
+{ nop                                           ;   xm.vlmacc0 t3}
+xm.vlmacc1 t3
+{ nop                                           ;   xm.vstd t3}
+{ nop                                           ;   lw a1, 0                           (sp)}
+{   slli a1, a1, 16                          ;   xm.vstr t3}
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.zexti a0, 16                             ; nop                                           }
+{   or a0, a0, a1                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+vpu_int16_acc_t vlmaccr16(
+    const vpu_int16_acc_t acc,
+    const int16_t x[VPU_INT16_EPV],
+    const int16_t y[VPU_INT16_EPV]);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmaccr16
+#define NSTACKWORDS     (12)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+{   xm.mkmski s8, 16                           ;   sw s8, 32                          (sp)}
+    li t3, 0x100    
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+{ nop                                           ;   xm.vclrdr                                  }
+{ nop                                           ;   xm.vstd t3}
+
+// The *last* accumulator is the one that will be added to.
+
+{   slli a3, a0, 16                          ;   xm.andnot a0, s8                          }
+{   addi t3,sp, 0                         ;   sw a0, 28                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a3, 28                           (sp)}
+{   li a3, 15                              ;   xm.vldr t3}
+
+.L_vlmaccr16_loop_top1:
+xm.ld16s s8, a3(a1)                       
+        xm.st16 s8,  a3(t3)
+    {   addi a3, a3, -1                           ;   xm.bt a3, .L_vlmaccr16_loop_top1           }
+
+{   li a3, 15                              ;   xm.vldc t3}
+
+.L_vlmaccr16_loop_top2:
+xm.ld16s s8, a3(a2)                       
+        xm.st16 s8,  a3(t3)
+    {   addi a3, a3, -1                           ;   xm.bt a3, .L_vlmaccr16_loop_top2           }
+
+{ nop                                           ;   xm.vlmaccr0 t3}
+xm.vlmaccr1 t3
+{ nop                                           ;   xm.vstd t3}
+{ nop                                           ;   lw a1, 0                           (sp)}
+{ nop                                           ;   xm.vstr t3}
+{   slli a1, a1, 16                          ;   lw a0, 0                           (sp)}
+{   xm.zexti a0, 16                             ;   lw s8, 32                          (sp)}
+{   or a0, a0, a1                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vlsat16(
+    const vpu_int16_acc_t acc,
+    const unsigned sat);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlsat16
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x100    
+{ nop                                           ;   xm.vsetc t3}
+{   srli a3, a0, 16                          ;   xm.zexti a0, 16                             }
+{   addi t3,sp, 0                         ;   sw a3, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   nop}
+xm.vlsat t3
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;   nop}
+{nop; xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vadddr16(
+    const vpu_int16_acc_t acc[VPU_INT16_ACC_PERIOD]);
+
+********************************************************/
+
+#define FUNCTION_NAME   vadddr16
+#define NSTACKWORDS     (12 + 8*2)
+FUNC_START
+FUNCTION_NAME:
+{   li a1, 8                               ;   xm.entsp (NSTACKWORDS)*4                   }
+    xm.stdsp  s3,s2,8
+    li t3, 0x100    
+{   addi a2,sp, 16                          ;   xm.vsetc t3}
+{   addi a3,sp, 48                         ;   li t3, 0                              }
+
+.L_split_loop_top:
+    { nop                                           ;   lw s2,0                           ( a0)}
+    {   addi a0, a0, 8                           ;   lw s3,4                           ( a0)}
+xm.zip s2, s3, 4
+    {   addi a1, a1, -1                           ;   sw s2,0                           ( a3)}
+    {   addi a3, a3, 4                           ;   sw s3,0                           ( a2)}
+    {   addi a2, a2, 4                           ;   xm.bt a1, .L_split_loop_top                }
+
+{   xm.ldawsp a3, 12                         ;  nop}
+{nop; xm.ldawsp t3, 4                         }
+{ nop                                           ;   xm.vldd a3}
+{ nop                                           ;   xm.vldr t3}
+//{ nop                                           ;   xm.vadddr                                  }
+{ nop                                           ;   xm.vstd a2}
+{ nop                                           ;   lw s2,0                           ( a2)}
+{   slli s2, s2, 16                          ;   xm.vstr a2}
+{ nop                                           ;   lw a0,0                           ( a2)}
+{   or a0, a0, s2                           ; nop                                           }
+
+    xm.lddsp  s3,s2,8
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S
new file mode 100644
index 00000000..8d0f646b
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S
@@ -0,0 +1,563 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define CAT_(A, B)    A##B
+#define CAT(A, B)     CAT_(A,B)
+
+#define FUNC_START  \                
+    .text ; \
+    .globl FUNCTION_NAME ; \
+    .type FUNCTION_NAME,@function ; \
+    .p2align 4
+
+
+#define FUNC_END                                \
+    .set FUNCTION_NAME.nstackwords,NSTACKWORDS;   \  
+    .global FUNCTION_NAME.nstackwords;      \
+    .set FUNCTION_NAME.maxcores,1;                 \ 
+    .global FUNCTION_NAME.maxcores;         \
+    .set FUNCTION_NAME.maxtimers,0;                \ 
+    .global FUNCTION_NAME.maxtimers;        \
+    .set FUNCTION_NAME.maxchanends,0;              \ 
+    .global FUNCTION_NAME.maxchanends;      \
+    CAT(.L_size_end_, FUNCTION_NAME):              \
+    .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME
+
+
+
+
+/* *****************************************************
+
+int32_t vladd32(
+    const int32_t x, 
+    const int32_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vladd32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vladd t3}
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vlsub32(
+    const int32_t x, 
+    const int32_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlsub32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a1, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vlsub t3}
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vlashr32(
+    const int32_t x,
+    const right_shift_t shr);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlashr32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{   xm.mkmski a2, 4                             ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+    xm.vlashr t3, a1
+    xm.vstrpv t3, a2
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vpos32(
+    const int32_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vpos32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vpos                                    }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vsign32(
+    const int32_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vsign32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vsign                                   }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+unsigned vdepth1_32(
+    const int32_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vdepth1_32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vdepth1                                 }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.zexti a0, 1                              ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vdepth8_32(
+    const int32_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vdepth8_32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vdepth8                                 }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 8                              ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int16_t vdepth16_32(
+    const int32_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vdepth16_32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vdepth16                                }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vlmul32(
+    const int32_t x,
+    const int32_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmul32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vlmul0 t3}
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+vpu_int32_acc_t vlmacc32(
+    const vpu_int32_acc_t acc,
+    const int32_t x,
+    const int32_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmacc32
+#define NSTACKWORDS     (12)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a2, 0                           (sp)}
+{ nop                                           ;   xm.vldc t3}
+{ nop                                           ;   sw a3, 0                           (sp)}
+{ nop                                           ;   xm.vlmacc0 t3}
+{ nop                                           ;   xm.vstd t3}
+{ nop                                           ;   lw a1, 0                           (sp)}
+{ nop                                           ;   xm.vstr t3}
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+vpu_int32_acc_t vlmaccr32(
+    const vpu_int32_acc_t acc,
+    const int32_t x[VPU_INT32_EPV],
+    const int32_t y[VPU_INT32_EPV]);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmaccr32
+#define NSTACKWORDS     (8)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+
+// The *last* accumulator is the one that will be added to.
+{ nop                                           ;   sw a1, 28                           (sp)}
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a0, 28                           (sp)}
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   xm.vldc a2}
+{ nop                                           ;   xm.vlmaccr0 a3}
+{ nop                                           ;   xm.vstd t3}
+{ nop                                           ;   lw a1, 0                           (sp)}
+{ nop                                           ;   xm.vstr t3}
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vlsat32(
+    const vpu_int32_acc_t acc,
+    const unsigned sat);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlsat32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a2, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   nop}
+xm.vlsat t3
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vcmr32(
+    const complex_s32_t x,
+    const complex_s32_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vcmr32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+{ nop                                           ;   xm.vldd a0}
+{ nop                                           ;   xm.vldc a1}
+{   xm.mkmski a1, 8                             ;   xm.vcmr0                                   }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vcmi32(
+    const complex_s32_t x,
+    const complex_s32_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vcmi32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+{ nop                                           ;   xm.vldd a0}
+{ nop                                           ;   xm.vldc a1}
+{   xm.mkmski a1, 8                             ;   xm.vcmi0                                    }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 4                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vcmcr32(
+    const complex_s32_t x,
+    const complex_s32_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vcmcr32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+{ nop                                           ;   xm.vldd a0}
+{ nop                                           ;   xm.vldc a1}
+{   xm.mkmski a1, 8                             ;   xm.vcmcr0                                   }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int32_t vcmci32(
+    const complex_s32_t x,
+    const complex_s32_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vcmci32
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{   li t3, 0                              ;   xm.entsp (NSTACKWORDS)*4                   }
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+{ nop                                           ;   xm.vldd a0}
+{ nop                                           ;   xm.vldc a1}
+{   xm.mkmski a1, 8                             ;   xm.vcmci0                                    }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 4                           (sp)}
+{ nop                                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S
new file mode 100644
index 00000000..8f5f7fd1
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S
@@ -0,0 +1,423 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define CAT_(A, B)    A##B
+#define CAT(A, B)     CAT_(A,B)
+
+#define FUNC_START  \                
+    .text ; \
+    .globl FUNCTION_NAME ; \
+    .type FUNCTION_NAME,@function ; \
+    .p2align 4
+
+
+#define FUNC_END                                \
+    .set FUNCTION_NAME.nstackwords,NSTACKWORDS;   \  
+    .global FUNCTION_NAME.nstackwords;      \
+    .set FUNCTION_NAME.maxcores,1;                 \ 
+    .global FUNCTION_NAME.maxcores;         \
+    .set FUNCTION_NAME.maxtimers,0;                \ 
+    .global FUNCTION_NAME.maxtimers;        \
+    .set FUNCTION_NAME.maxchanends,0;              \ 
+    .global FUNCTION_NAME.maxchanends;      \
+    CAT(.L_size_end_, FUNCTION_NAME):              \
+    .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vladd8(
+    const int8_t x, 
+    const int8_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vladd8
+#define NSTACKWORDS     (4)
+
+// .global FUNCTION_NAME; 
+// .type FUNCTION_NAME,@function;
+// .cc_top FUNCTION_NAME.function, FUNCTION_NAME
+
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vladd t3}
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 8                              ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+
+// //.cc_bottom FUNCTION_NAME.function; 
+// .set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; 
+// .set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores; 
+// .set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers; 
+// .set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends; 
+
+// CAT(.L_size_end_, FUNCTION_NAME): 
+//     .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME
+
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vlsub8(
+    const int8_t x, 
+    const int8_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlsub8
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a1, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vlsub t3}
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vlashx22(
+    const int8_t x,
+    const right_shift_t shr);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlashx22
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{   xm.mkmski a2, 4                             ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+    xm.vlashr t3, a1
+    xm.vstrpv t3, a2
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 8                              ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vpos8(
+    const int8_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vpos8
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vpos                                    }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 8                              ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vsign8(
+    const int8_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vsign8
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vsign                                   }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 16                             ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+unsigned vdepth1_8(
+    const int8_t x);
+
+********************************************************/
+
+#define FUNCTION_NAME   vdepth1_8
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{   xm.mkmski a1, 4                             ;   xm.vdepth1                                 }
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.zexti a0, 1                              ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vlmul8(
+    const int8_t x,
+    const int8_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmul8
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{ nop                                           ;   xm.vsetc t3}
+{   addi t3,sp, 0                         ;   sw a0, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   xm.vlmul0 t3}
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 8                              ;  xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+vpu_int8_acc_t vlmacc8(
+    const vpu_int8_acc_t acc,
+    const int8_t x,
+    const int8_t y);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmacc8
+#define NSTACKWORDS     (8)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{ nop                                           ;   xm.vsetc t3}
+{   srli a3, a0, 16                             ;   xm.zexti a0, 16                             }
+{   addi t3,sp, 0                               ;   sw a3, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{ nop                                           ;   xm.vldc t3}
+{ nop                                           ;   sw a2, 0                           (sp)}
+{ nop                                           ;   xm.vlmacc0 t3}
+{ nop                                           ;   xm.vstd t3}
+{ nop                                           ;   lw a1, 0                           (sp)}
+{   slli a1, a1, 16                             ;   xm.vstr t3}
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.zexti a0, 16                             ;   nop                                           }
+{   or a0, a0, a1                               ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+vpu_int8_acc_t vlmaccx22(
+    const vpu_int8_acc_t acc,
+    const int8_t x[VPU_INT8_EPV],
+    const int8_t y[VPU_INT8_EPV]);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlmaccx22
+#define NSTACKWORDS     (12)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+{   xm.mkmski s8, 16                           ;   sw s8, 32                          (sp)}
+    li t3, 0x200    
+{   addi t3,sp, 0                         ;   xm.vsetc t3}
+{ nop                                           ;   xm.vclrdr                                  }
+{ nop                                           ;   xm.vstd t3}
+
+// The *last* accumulator is the one that will be added to.
+
+{   slli a3, a0, 16                          ;   xm.andnot a0, s8                          }
+{   addi t3,sp, 0                         ;   sw a0, 28                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a3, 28                           (sp)}
+{   li a3, 31                              ;   xm.vldr t3}
+
+.L_vlmaccx22_loop_top1:
+    { nop                                           ;   xm.ld8u s8,  a3                        (a1)}
+        xm.st8 s8,  a3(t3)
+    {   addi a3, a3, -1                           ;   xm.bt a3, .L_vlmaccx22_loop_top1            }
+
+{   li a3, 31                              ;   xm.vldc t3}
+
+.L_vlmaccx22_loop_top2:
+    { nop                                           ;   xm.ld8u s8,  a3                        (a2)}
+        xm.st8 s8,  a3(t3)
+    {   addi a3, a3, -1                           ;   xm.bt a3, .L_vlmaccx22_loop_top2            }
+
+{ nop                                           ;   xm.vlmaccr0 t3}
+{ nop                                           ;   xm.vstd t3}
+{ nop                                           ;   lw a1, 0                           (sp)}
+{ nop                                           ;   xm.vstr t3}
+{   slli a1, a1, 16                          ;   lw a0, 0                           (sp)}
+{   xm.zexti a0, 16                             ;   lw s8, 32                          (sp)}
+{   or a0, a0, a1                           ;   xm.retsp (NSTACKWORDS)*4                       }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+/* *****************************************************
+
+int8_t vlsat8(
+    const vpu_int8_acc_t acc,
+    const unsigned sat);
+
+********************************************************/
+
+#define FUNCTION_NAME   vlsat8
+#define NSTACKWORDS     (4)
+FUNC_START
+FUNCTION_NAME:
+{ nop                                           ;   xm.entsp (NSTACKWORDS)*4                   }
+    li t3, 0x200    
+{ nop                                           ;   xm.vsetc t3}
+{   srli a3, a0, 16                          ;   xm.zexti a0, 16                             }
+{   addi t3,sp, 0                         ;   sw a3, 0                           (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+{ nop                                           ;   xm.vldd t3}
+{ nop                                           ;   sw a0, 0                           (sp)}
+{ nop                                           ;   xm.vldr t3}
+{ nop                                           ;   sw a1, 0                           (sp)}
+{   xm.mkmski a1, 4                             ;   nop}
+xm.vlsat t3
+    xm.vstrpv t3, a1
+{ nop                                           ;   lw a0, 0                           (sp)}
+{   xm.sext a0, 8                              ;   xm.retsp (NSTACKWORDS)*4                    }
+FUNC_END
+#undef NSTACKWORDS
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/scalar/sqrt_s32.S b/lib_xcore_math/src/arch/vx4b/scalar/sqrt_s32.S
new file mode 100644
index 00000000..4a39eb83
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/scalar/sqrt_s32.S
@@ -0,0 +1,129 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+/*  
+int32_t s32_sqrt( 
+    exponent_t* y_exp,
+    const int32_t X,
+    const exponent_t x_exp,
+    const unsigned depth);
+
+    @todo This can probably be sped up ~25% by using the VPU to compute 3 bits at a time. 
+          (The speedup would be more significant if there was a quick way to create an element mask (vdepth1 creates a
+           byte mask) and a way to load each vR[k] from a single register.
+*/
+
+#define FUNCTION_NAME   s32_sqrt
+#define NSTACKWORDS     (8)
+
+
+#define y_exp   x10
+#define X       x11
+#define x_exp   x12
+#define depth   x13
+#define tmp     x19
+
+
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,0
+
+
+{   xm.cls tmp, X                  ;  nop}
+ xm.stwsp s8, 24      
+{   addi tmp, tmp, -1             ;   li t3, 31                 }
+{   xm.shl X, X, tmp               ;   sub tmp, x_exp, tmp         }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli X, X, tmp               \nMessage: The shift amount is not 32" */
+{   sub tmp, tmp, t3           ;   sub x_exp, tmp, t3         }
+{   xm.zexti tmp, 1                 ; nop                               }
+lui t3, %hi(vpu_vec_0x80000000)
+addi t3,t3, %lo(vpu_vec_0x80000000)
+{ nop                               ;   lw t3,0             ( t3)}
+{ nop                               ;   xm.brff tmp, .L_is_even          }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        srai t3, t3, 1
+    {   addi x_exp, x_exp, 1         ; nop                               }
+
+.L_is_even:
+    srai x_exp, x_exp, 1
+{ nop                               ;   sw x_exp,0         ( y_exp)}
+
+#undef x_exp
+#undef y_exp
+
+#define targ_hi     x10
+#define targ_lo     x11
+#define result      x12
+#define guess       x18
+#define base        x20
+#define acc_hi      x21
+#define acc_lo      x22
+#define a_exp       x23
+
+
+{   mv tmp, t3                ;   mv t3, X                  }
+{   li targ_hi, 0              ;   li targ_lo, 0              }
+    xm.maccs targ_hi, targ_lo, tmp, t3
+
+#undef X
+
+// Subtract just one more from targ_hi:targ_lo, so that we're doing <= instead of just <
+{   li tmp, 1                  ;   xm.mkmski t3, 32               }
+    xm.maccs targ_hi, targ_lo, tmp, t3
+
+    li base, 0x40
+{   li result, 0               ;   slli base, base, 24          }
+
+// @todo can potentially save a little bit of time by doing a clz on targ_hi. Might be able to skip the first iteration
+
+.L_loop_top:
+    {   mv acc_hi, targ_hi         ;   mv acc_lo, targ_lo         }
+    {   add tmp, result, base       ;   addi depth, depth, -1         }
+        xm.maccs acc_hi, acc_lo, tmp, tmp
+    {   xm.clz acc_hi, acc_hi          ; nop                               }
+    {   srli base, base, 1           ;   xm.bt acc_hi, .L_too_large     }
+        {   mv result, tmp             ; nop                               }
+    .L_too_large:
+    { nop                               ;   xm.bt depth, .L_loop_top       }
+.L_loop_end:
+
+    xm.lddsp  s7,s6,0
+    xm.lddsp  s5,s4,16
+    xm.lddsp  s3,s2,8
+{ nop                               ;   lw s8, 24              (sp)}
+{   mv a0, result              ;   xm.retsp (NSTACKWORDS)*4           }
+
+.L_func_end:
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME,.L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_complex_scale.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_complex_scale.S
new file mode 100644
index 00000000..3adc356f
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_complex_scale.S
@@ -0,0 +1,205 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_scale(
+    int16_t* a_real,
+    int16_t* a_imag,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const int16_t c_real,
+    const int16_t c_imag,
+    const unsigned length,
+    const right_shift_t sat);
+
+*/
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (8+8*(NSTACKVECS)+4)
+
+
+#define STACK_VEC_SAT       (NSTACKWORDS-8-4)
+#define STACK_VEC_C_REAL    (NSTACKWORDS-16-4)
+#define STACK_VEC_C_IMAG    (NSTACKWORDS-24-4)
+#define STACK_VEC_C_IMAG_N  (NSTACKWORDS-32-4)
+
+#define FUNCTION_NAME vect_complex_s16_scale
+    
+#define a_real      x10
+#define a_imag      x11
+#define b_real      x12
+#define b_imag      x13
+#define length      x18
+#define _32         x19
+#define bytemask    x20
+
+
+.text
+.p2align 2
+
+/*
+    We're doing this:
+
+    vR  <- -1
+    vR  <- -1 * b.imag
+    vC  <- -b.imag  
+    acc <- 0
+    acc <- vC * c.imag
+    vC  <- b.real
+    acc <- acc + vC * c.real
+    vR  <- acc >> sat
+    a.real <- vR
+
+    (vC still has b.real)
+    acc <- 0
+    acc <- vC * c.imag
+    vC  <- b.imag
+    acc <- acc + vC * c.real
+    vR  <- acc >> sat
+    a.imag <- vR
+*/
+
+
+FUNCTION_NAME:
+    
+
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in entsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+    addi t3,sp, (STACK_VEC_C_IMAG_N)*4/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldawsp t3, STACK_VEC_C_IMAG_N\nMessage: the word-scale offset fits in a 6b unsigned immediate" */
+    mv s4, a5
+    xm.neg s2, s4
+    xm.neg s3, s4
+xm.zip s3, s2, 4
+
+    xm.stdi  s2,s2, 0(t3)
+    xm.stdi  s2,s2, 8(t3)
+    xm.stdi  s2,s2, 16(t3)
+    xm.stdi  s2,s2, 24(t3)
+
+    addi t3,sp, (STACK_VEC_C_IMAG)*4/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldawsp t3, STACK_VEC_C_IMAG\nMessage: the word-scale offset fits in a 6b unsigned immediate" */
+    mv s2, s4
+
+    xm.zip s4, s2, 4
+
+    xm.stdi  s2,s2, 0(t3)
+    xm.stdi  s2,s2, 8(t3)
+    xm.stdi  s2,s2, 16(t3)
+    xm.stdi  s2,s2, 24(t3)
+    mv s2, a4
+    mv s3, s2
+xm.zip s3, s2, 4
+    
+    xm.stdi  s2,s2, 32(t3)
+    xm.stdi  s2,s2, 40(t3)
+    xm.stdi  s2,s2, 48(t3)
+    xm.stdi  s2,s2, 56(t3)
+
+    mv s2, a7
+    mv s3, s2
+xm.zip s3, s2, 4
+    
+    xm.stdi  s2,s2, 64(t3)
+    xm.stdi  s2,s2, 72(t3)
+    xm.stdi  s2,s2, 80(t3)
+    xm.stdi  s2,s2, 88(t3)
+
+
+.p2align 2
+    mv length, a6
+    {   li _32, 32                             ;   nop}/* XAT Warning: 'LDWSP has unknown offset - this may need correction' */
+#define vect_count  length
+    {   srli vect_count, length, 4               ;   slli bytemask, length, SIZEOF_LOG2_S16   }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, length, SIZEOF_LOG2_S16   \nMessage: The shift amount is not 32" */
+    {   xm.zexti bytemask, 5                        ;   slli t3, _32, 3                         }
+    {   addi t3,sp, (STACK_VEC_C_IMAG_N)*4        ;   xm.vsetc t3}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+    { nop                                           ;   xm.brff vect_count, .L_loop_bot              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        { nop                                           ;   xm.vldc t3}
+        {   addi vect_count, vect_count, -1           ;   xm.vclrdr                                  }
+        {   addi t3,sp, (STACK_VEC_C_REAL)*4          ;   xm.vlmacc0 b_imag}
+         xm.vlmacc1 b_imag
+        { nop                                           ;   xm.vldc b_real}
+        { xm.vlmacc0 t3; nop}
+          xm.vlmacc1 t3
+        {   add b_real, b_real, _32                 ;   addi t3,sp, (STACK_VEC_SAT)*4}
+        xm.vlsat t3
+        {   add a_real, a_real, _32                 ;   xm.vstr a_real}
+        {   addi t3,sp, (STACK_VEC_C_IMAG)*4          ;   xm.vclrdr                                  }/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+        { nop                                           ;   xm.vlmacc0 t3}                        /* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+        xm.vlmacc1 t3
+        {   addi t3,sp, (STACK_VEC_C_REAL)*4          ;   xm.vldc b_imag}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+        {    nop            ;   xm.vlmacc0 t3}
+        xm.vlmacc1 t3
+        {   add b_imag, b_imag, _32                 ;   addi t3,sp, (STACK_VEC_SAT)*4}
+        xm.vlsat t3
+        {   add a_imag, a_imag, _32                 ;   xm.vstr a_imag}
+        {   addi t3,sp, (STACK_VEC_C_IMAG_N)*4        ;   xm.bt vect_count, .L_loop_top              }/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+    .L_loop_bot:
+
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff bytemask, .L_done                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vldc t3}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd t3}
+    {   addi t3,sp, (STACK_VEC_C_REAL)*4          ;   xm.vlmacc0 b_imag}
+    xm.vlmacc1 b_imag
+#define vec_tmp     b_real
+    {   addi vec_tmp,sp, (STACK_VEC_C_IMAG_N)*4    ;   xm.vldc b_real}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+    {  nop              ;   xm.vlmacc0 t3}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+    xm.vlmacc1 t3
+    addi t3,sp, (STACK_VEC_SAT)*4
+    xm.vlsat t3
+        xm.vstrpv vec_tmp, bytemask
+        xm.vstrpv a_real, bytemask        
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+    {   addi t3,sp, (STACK_VEC_C_IMAG)*4          ;   xm.vclrdr                                  }/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+    { nop                                           ;   xm.vlmacc0 t3}                          /* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+    xm.vlmacc1 t3
+    {   addi t3,sp, (STACK_VEC_C_REAL)*4          ;   xm.vldc b_imag}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */
+    {   nop            ;   xm.vlmacc0 t3}
+    xm.vlmacc1 t3
+     {   addi t3,sp, (STACK_VEC_SAT)*4             ;  nop}
+    xm.vlsat t3
+        xm.vstrpv vec_tmp, bytemask
+        xm.vstrpv a_imag, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+
+
+.L_done:
+        xm.lddsp  s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+        xm.lddsp  s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_macc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_macc.S
new file mode 100644
index 00000000..380ed7e5
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_macc.S
@@ -0,0 +1,219 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_conj_macc(
+    int16_t* acc_real,
+    int16_t* acc_imag,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const int16_t* c_real,
+    const int16_t* c_imag,
+    const unsigned length,
+    const right_shift_t acc_shr,
+    const right_shift_t sat);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (12+8*(NSTACKVECS)+4)
+
+
+#define STACK_SAT       (NSTACKWORDS)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_SAT   (NSTACKWORDS-16-4)
+
+#define STACK_BYTEMASK  8
+
+#define FUNCTION_NAME vect_complex_s16_conj_macc
+    
+#define acc_re      x10
+#define acc_im      x11
+#define b_re        x12
+#define b_im        x13
+#define c_re        x18
+#define c_im        x19
+#define len         x20
+#define vec_tmp     x21
+#define vec_sat     x22
+#define bytemask    x23
+#define acc_shr     x23
+
+
+/*
+    We want:
+
+    C.re <--  b.real * c.real + b.imag * c.imag
+    C.im <--  b.imag * c.real - b.real * c.imag
+
+    We're doing this:
+
+    vR  <- -1
+    vR  <- -1 * b.real
+    vC  <- -b.real  
+    
+    tmp <- 0
+    tmp <- vC * c.imag
+    vC  <- b.imag
+    tmp <- tmp + vC * c.real
+    vR  <- tmp >> sat
+    vR  <- acc.imag + vR
+    acc.imag <- vR
+
+    (vC still has b.imag)
+    tmp <- 0
+    tmp <- vC * c.imag
+    vC  <- b.real
+    tmp <- tmp + vC * c.real
+    vR  <- tmp >> sat
+    vR  <- acc.real + vR
+    acc.real <- vR
+*/
+
+
+.L_neg_ones:
+    .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   lw s2, (STACK_SAT)*4                   (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli s3, s2, 16                          ;   xm.zexti s2, 16                             }
+    {   or s2, s2, s3                           ;   sw s8, 4                          (sp)}
+        xm.stdi  s2,s2, 0(t3)
+        xm.stdi  s2,s2, 8(t3)
+        xm.stdi  s2,s2, 16(t3)
+        xm.stdi  s2,s2, 24(t3)
+    mv bytemask, a6
+    {   li t3, 32                               ;  nop}
+    mv c_re, a4
+    {   srli len, bytemask, 4                    ;  nop}
+    mv c_im, a5
+    {   xm.zexti bytemask, 4                        ;  nop}
+    {   slli bytemask, bytemask, SIZEOF_LOG2_S16 ;   slli t3, t3, 3                         }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */
+    { nop                                           ;   sw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    mv acc_shr, a7
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.vsetc t3}
+
+        la t3, vpu_vec_0x8000
+    {   mv s8, t3                            ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        {   xm.mkmski t3, 32                           ; nop                                           }
+            xm.vlashr acc_re, acc_shr
+            xm.vstrpv acc_re, t3
+            xm.vlashr acc_im, acc_shr
+            xm.vstrpv acc_im, t3
+        {   mv t3, s8                            ; nop                                           }
+        {   addi len, len, -1                         ;   xm.vldr t3}
+        {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_re}
+        xm.vlmul1 b_re
+            xm.vstrpv vec_tmp, t3
+        {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+        { nop                                           ;   xm.vclrdr                                  }
+        { nop                                           ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        { nop                                           ;   xm.vldc b_im}
+        { nop                                           ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+
+        xm.vlsat vec_sat
+        { nop                                           ;   xm.vladd acc_im}
+        {   li t3, 32                             ;   xm.vstr acc_im}
+        {   add b_im, b_im, t3                     ;   xm.vclrdr                                  }
+        {   add acc_im, acc_im, t3                 ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        {   add c_im, c_im, t3                     ;   xm.vldc b_re}
+        {   add b_re, b_re, t3                     ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        {   add c_re, c_re, t3                     ;   nop}
+        xm.vlsat vec_sat
+        { nop                                           ;   xm.vladd acc_re}
+        {   add acc_re, acc_re, t3                 ;   xm.vstr acc_re}
+        {   mv t3, s8                            ;   xm.bt len, .L_loop_top                     }
+    .L_loop_bot:
+
+#undef bytemask
+#define bytemask len
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff bytemask, .L_done                    }   /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+        xm.vlashr acc_re, acc_shr
+        xm.vstrpv acc_re, bytemask
+        xm.vlashr acc_im, acc_shr
+        xm.vstrpv acc_im, bytemask
+    { nop                                           ;   xm.vldr t3}
+    {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_re}
+    xm.vlmul1 b_re
+        xm.vstrpv vec_tmp, t3
+    {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_im}
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+    { nop                                           ;   xm.vladd acc_im}
+        xm.vstrpv acc_im, bytemask
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_re}
+    { nop                                           ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+    { nop                                           ;   xm.vladd acc_re}
+        xm.vstrpv acc_re, bytemask   
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_nmacc.S
new file mode 100644
index 00000000..37fb850b
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_nmacc.S
@@ -0,0 +1,211 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_conj_nmacc(
+    int16_t* acc_real,
+    int16_t* acc_imag,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const int16_t* c_real,
+    const int16_t* c_imag,
+    const unsigned length,
+    const right_shift_t acc_shr,
+    const right_shift_t sat);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (12+8*(NSTACKVECS)+4)
+
+#define STACK_SAT       (NSTACKWORDS)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_SAT   (NSTACKWORDS-16-4)
+
+#define STACK_BYTEMASK  8
+
+#define FUNCTION_NAME vect_complex_s16_conj_nmacc
+    
+#define acc_re      x10
+#define acc_im      x11
+#define b_re        x12
+#define b_im        x13
+#define c_re        x18
+#define c_im        x19
+#define len         x20
+#define vec_tmp     x21
+#define vec_sat     x22
+#define bytemask    x23
+#define acc_shr     x23
+
+
+/*
+    We're doing this:
+
+    vR  <- -1
+    vR  <- -1 * b.real
+    vC  <- -b.real  
+    tmp <- 0
+    tmp <- vC * c.imag
+    vC  <- b.imag
+    tmp <- tmp + vC * c.real
+    vR  <- tmp >> sat
+    vR  <- acc.imag - vR
+    acc.imag <- vR
+
+    (vC still has b.imag)
+    tmp <- 0
+    tmp <- vC * c.imag
+    vC  <- b.real
+    tmp <- tmp + vC * c.real
+    vR  <- tmp >> sat
+    vR  <- acc.real - vR
+    acc.real <- vR
+*/
+
+
+.L_neg_ones:
+    .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   lw s2, (STACK_SAT)*4                   (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli s3, s2, 16                          ;   xm.zexti s2, 16                             }
+    {   or s2, s2, s3                           ;   sw s8, 4                          (sp)}
+        xm.stdi  s2,s2, 0(t3)
+        xm.stdi  s2,s2, 8(t3)
+        xm.stdi  s2,s2, 16(t3)
+        xm.stdi  s2,s2, 24(t3)
+    mv bytemask, a6
+    {   li t3, 32                             ;   nop}
+    mv c_re, a4
+    {   srli len, bytemask, 4                    ;   nop}
+    mv c_im, a5
+    {   xm.zexti bytemask, 4                        ;  nop}
+    {   slli bytemask, bytemask, SIZEOF_LOG2_S16 ;   slli t3, t3, 3                         }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */
+    { nop                                           ;   sw bytemask, (STACK_BYTEMASK)*4        (sp)}
+
+    mv acc_shr, a7
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.vsetc t3}
+        la t3, vpu_vec_0x8000
+    {   mv s8, t3                            ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        {   xm.mkmski t3, 32                           ; nop                                           }
+            xm.vlashr acc_re, acc_shr
+            xm.vstrpv acc_re, t3
+            xm.vlashr acc_im, acc_shr
+            xm.vstrpv acc_im, t3
+        {   mv t3, s8                            ; nop                                           }
+        {   addi len, len, -1                         ;   xm.vldr t3}
+        {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_re}
+        xm.vlmul1 b_re
+            xm.vstrpv vec_tmp, t3
+        {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+        { nop                                           ;   xm.vclrdr                                  }
+        { nop                                           ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        { nop                                           ;   xm.vldc b_im}
+        { nop                                           ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        xm.vlsat vec_sat
+        { nop                                           ;   xm.vlsub acc_im}
+        {   li t3, 32                             ;   xm.vstr acc_im}
+        {   add b_im, b_im, t3                     ;   xm.vclrdr                                  }
+        {   add acc_im, acc_im, t3                 ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        {   add c_im, c_im, t3                     ;   xm.vldc b_re}
+        {   add b_re, b_re, t3                     ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        {   add c_re, c_re, t3                     ;   nop}
+        xm.vlsat vec_sat
+        { nop                                           ;   xm.vlsub acc_re}
+        {   add acc_re, acc_re, t3                 ;   xm.vstr acc_re}
+        {   mv t3, s8                            ;   xm.bt len, .L_loop_top                     }
+    .L_loop_bot:
+
+#undef bytemask
+#define bytemask len
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff bytemask, .L_done                    }   /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+        xm.vlashr acc_re, acc_shr
+        xm.vstrpv acc_re, bytemask
+        xm.vlashr acc_im, acc_shr
+        xm.vstrpv acc_im, bytemask
+    { nop                                           ;   xm.vldr t3}
+    {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_re}
+    xm.vlmul1 b_re
+        xm.vstrpv vec_tmp, t3
+    {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_im}
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 c_re}
+     xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+    { nop                                           ;   xm.vlsub acc_im}
+        xm.vstrpv acc_im, bytemask
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_re}
+    { nop                                           ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+    { nop                                           ;   xm.vlsub acc_re}
+        xm.vstrpv acc_re, bytemask   
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conjugate_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conjugate_mul.S
new file mode 100644
index 00000000..fd84b54c
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conjugate_mul.S
@@ -0,0 +1,160 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_conj_mul(
+    int16_t* a_real,
+    int16_t* a_imag,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const int16_t* c_real,
+    const int16_t* c_imag,
+    const unsigned length,
+    const right_shift_t sat);
+
+*/
+
+#define FUNCTION_NAME vect_complex_s16_conj_mul
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (12+8*(NSTACKVECS)+4)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_SAT   (NSTACKWORDS-16-4)
+
+#define STACK_BYTEMASK  8
+
+    
+#define a_re        x10
+#define a_im        x11
+#define b_re        x12
+#define b_im        x13
+#define c_re        x18
+#define c_im        x19
+#define len         x20
+#define vec_tmp     x21
+#define vec_sat     x22
+#define bytemask    x23
+
+
+.text
+.p2align 2
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    mv s2, a7
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;  nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli s3, s2, 16                          ;   xm.zexti s2, 16                             }
+    {   or s2, s2, s3                           ;   sw s8, 4                          (sp)}
+        xm.stdi  s2,s2, 0(t3)
+        xm.stdi  s2,s2, 8(t3)
+        xm.stdi  s2,s2, 16(t3)
+        xm.stdi  s2,s2, 24(t3)
+    mv bytemask, a6
+    {   li t3, 32                             ;   nop}
+    mv c_re, a4
+    {   srli len, bytemask, 4                    ;   nop}
+    mv c_im, a5
+    {   xm.zexti bytemask, 4                        ;  nop}
+    {   slli bytemask, bytemask, SIZEOF_LOG2_S16 ;   slli t3, t3, 3                         }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.vsetc t3}
+
+        la t3, vpu_vec_0x8000
+    {   mv s8, t3                            ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        {   addi len, len, -1                         ;   xm.vldr t3}
+        {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_re}
+        xm.vlmul1 b_re
+            xm.vstrpv vec_tmp, t3
+        {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+        { nop                                           ;   xm.vclrdr                                  }
+        { nop                                           ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        { nop                                           ;   xm.vldc b_im}
+        {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        xm.vlsat vec_sat
+        {   li t3, 32                             ;   xm.vstr a_im}
+        {   add b_im, b_im, t3                     ;   xm.vclrdr                                  }
+        {   add a_im, a_im, t3                     ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        {   add c_im, c_im, t3                     ;   xm.vldc b_re}
+        {   add b_re, b_re, t3                     ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        {   add c_re, c_re, t3                     ;   nop}
+        xm.vlsat vec_sat
+        {   add a_re, a_re, t3                     ;   xm.vstr a_re}
+        {   mv t3, s8                            ;   xm.bt len, .L_loop_top                     }
+    .L_loop_bot:
+
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff bytemask, .L_done                    }   /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    { nop                                           ;   xm.vldr t3}
+    {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_re}
+    xm.vlmul1 b_re
+        xm.vstrpv vec_tmp, t3
+    {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_im}
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+        xm.vstrpv a_im, bytemask
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_re}
+    { nop                                           ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+        xm.vstrpv a_re, bytemask   
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_macc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_macc.S
new file mode 100644
index 00000000..ecd925c2
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_macc.S
@@ -0,0 +1,212 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_macc(
+    int16_t* acc_real,
+    int16_t* acc_imag,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const int16_t* c_real,
+    const int16_t* c_imag,
+    const unsigned length,
+    const right_shift_t acc_shr,
+    const right_shift_t sat);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (12+8*(NSTACKVECS)+4)
+
+#define STACK_SAT       (NSTACKWORDS)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_SAT   (NSTACKWORDS-16-4)
+
+#define STACK_BYTEMASK  8
+
+#define FUNCTION_NAME vect_complex_s16_macc
+    
+#define acc_re      x10
+#define acc_im      x11
+#define b_re        x12
+#define b_im        x13
+#define c_re        x18
+#define c_im        x19
+#define len         x20
+#define vec_tmp     x21
+#define vec_sat     x22
+#define bytemask    x23
+#define acc_shr     x23
+
+
+/*
+    We're doing this:
+
+    vR  <- -1
+    vR  <- -1 * b.imag
+    vC  <- -b.imag  
+    tmp <- 0
+    tmp <- vC * c.imag
+    vC  <- b.real
+    tmp <- tmp + vC * c.real
+    vR  <- tmp >> sat
+    vR  <- acc.real + vR
+    acc.real <- vR
+
+    (vC still has b.real)
+    tmp <- 0
+    tmp <- vC * c.imag
+    vC  <- b.imag
+    tmp <- tmp + vC * c.real
+    vR  <- tmp >> sat
+    vR  <- acc.imag + vR
+    acc.imag <- vR
+*/
+
+
+.L_neg_ones:
+    .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   lw s2, (STACK_SAT)*4                   (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli s3, s2, 16                          ;   xm.zexti s2, 16                             }
+    {   or s2, s2, s3                           ;   sw s8, 4                          (sp)}
+        xm.stdi  s2,s2, 0(t3)
+        xm.stdi  s2,s2, 8(t3)
+        xm.stdi  s2,s2, 16(t3)
+        xm.stdi  s2,s2, 24(t3)
+    mv bytemask, a6
+    {   li t3, 32                             ;  nop}
+    mv c_re, a4
+    {   srli len, bytemask, 4                    ;  nop}
+    mv c_im, a5
+    {   xm.zexti bytemask, 4                        ;   nop}
+    {   slli bytemask, bytemask, SIZEOF_LOG2_S16 ;   slli t3, t3, 3                         }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */
+    { nop                                           ;   sw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    mv acc_shr, a7
+    { nop                                           ;   nop}
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.vsetc t3}
+        
+        la t3, vpu_vec_0x8000
+    {   mv s8, t3                            ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        {   xm.mkmski t3, 32                           ; nop                                           }
+            xm.vlashr acc_re, acc_shr
+            xm.vstrpv acc_re, t3
+            xm.vlashr acc_im, acc_shr
+            xm.vstrpv acc_im, t3
+        {   mv t3, s8                            ; nop                                           }
+        {   addi len, len, -1                         ;   xm.vldr t3}
+        {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_im}
+        xm.vlmul1 b_im
+            xm.vstrpv vec_tmp, t3
+        {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+        { nop                                           ;   xm.vclrdr                                  }
+        { nop                                           ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        { nop                                           ;   xm.vldc b_re}
+        { nop                                           ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        xm.vlsat vec_sat
+        { nop                                           ;   xm.vladd acc_re}
+        {   li t3, 32                             ;   xm.vstr acc_re}
+        {   add b_re, b_re, t3                     ;   xm.vclrdr                                  }
+        {   add acc_re, acc_re, t3                 ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        {   add c_im, c_im, t3                     ;   xm.vldc b_im}
+        {   add b_im, b_im, t3                     ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        {   add c_re, c_re, t3                     ;   nop}
+        xm.vlsat vec_sat
+        { nop                                           ;   xm.vladd acc_im}
+        {   add acc_im, acc_im, t3                 ;   xm.vstr acc_im}
+        {   mv t3, s8                            ;   xm.bt len, .L_loop_top                     }
+    .L_loop_bot:
+
+#undef bytemask
+#define bytemask len
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff bytemask, .L_done                    }   /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+        xm.vlashr acc_re, acc_shr
+        xm.vstrpv acc_re, bytemask
+        xm.vlashr acc_im, acc_shr
+        xm.vstrpv acc_im, bytemask
+    { nop                                           ;   xm.vldr t3}
+    {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_im}
+    xm.vlmul1 b_im
+        xm.vstrpv vec_tmp, t3
+    {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_re}
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+    { nop                                           ;   xm.vladd acc_re}
+        xm.vstrpv acc_re, bytemask
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vlmacc0 c_im}
+     xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_im}
+    { nop                                           ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+    { nop                                           ;   xm.vladd acc_im}
+        xm.vstrpv acc_im, bytemask   
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mag.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mag.S
new file mode 100644
index 00000000..412a6134
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mag.S
@@ -0,0 +1,269 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_mag(
+    int16_t a[],
+    const int16_t b_real[],
+    const int16_t b_imag[],
+    const unsigned length,
+    const right_shift_t b_shr,
+    const int16_t* rot_table,
+    const unsigned table_rows)
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (8)
+#define NSTACKWORDS     (8+(8*NSTACKVECS)+4)
+
+
+#define STACK_VEC_TMP_IMAG  (NSTACKWORDS-8-4)
+#define STACK_VEC_TMP_REAL  (NSTACKWORDS-16-4)
+#define STACK_VEC_TMP2      (NSTACKWORDS-24-4)
+#define STACK_VEC_NEG_ONES  (NSTACKWORDS-40-4)
+#define STACK_VEC_SAT       (NSTACKWORDS-32-4)
+
+#define FUNCTION_NAME vect_complex_s16_mag
+
+#define Q(R)    R
+
+#define a           x10 
+#define b_real      x11 
+#define b_imag      x12
+#define length      x13
+#define b_shr       x18
+#define _32         x19
+#define vec_neg_one x20
+#define mask32      x21
+#define tail_bytes  x22
+#define iter        x23
+#define table       x24
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    {   li _32, 32                             ;   sw s8, 4                          (sp)}
+
+    {   slli t3, _32, 3                         ;   mv tail_bytes, length                  }
+    {   srli length, length, 4                   ;   xm.vsetc t3}
+    { nop                                           ;   li t3, 15                             }
+
+    {   slli s8, t3, 16                        ;   xm.zexti tail_bytes, 4                      }
+    {   or t3, t3, s8                        ;   xm.mkmski mask32, 32                        }
+    xm.stdsp  t3,t3,(STACK_VEC_SAT/2 + 0)*8
+    xm.stdsp  t3,t3,(STACK_VEC_SAT/2 + 1)*8
+    xm.stdsp  t3,t3,(STACK_VEC_SAT/2 + 2)*8
+    xm.stdsp  t3,t3,(STACK_VEC_SAT/2 + 3)*8
+    li s8, 0xC000
+    {   slli s7, s8, 16                         ;   slli tail_bytes, tail_bytes, 1           }
+    {   or s8, s8, s7                         ;   nop}
+    addi vec_neg_one, sp, STACK_VEC_NEG_ONES*4
+
+    xm.stdi  s8,s8, 0(vec_neg_one)
+    xm.stdi  s8,s8, 8(vec_neg_one)
+    xm.stdi  s8,s8, 16(vec_neg_one)
+    xm.stdi  s8,s8, 24(vec_neg_one)
+
+    {   xm.mkmsk tail_bytes, tail_bytes            ;   nop}
+    mv b_shr, a4
+    {   addi t3,sp, (STACK_VEC_TMP_REAL)*4        ;   nop            }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    beqz length, .L_outer_loop_bot 
+
+    .L_outer_loop_top:
+            xm.vlashr b_real, b_shr
+            xm.vstrpv t3, mask32
+        {   add b_real, b_real, _32                 ;   xm.vsign                                   }
+        addi Q(iter),sp, (STACK_VEC_TMP_IMAG)*4    
+        {nop ;   xm.vlmul0 t3}
+        xm.vlmul1 t3
+        xm.vstrpv t3, mask32
+
+            {nop; xm.vladd t3} ///
+            xm.vstrpv t3, mask32 ///
+
+
+            xm.vlashr b_imag, b_shr
+            xm.vstrpv Q(iter), mask32
+        {   add b_imag, b_imag, _32                 ;   xm.vsign                                   }
+        { nop                                           ;   xm.vlmul0 Q(iter)}
+        xm.vlmul1 Q(iter)
+        xm.vstrpv Q(iter), mask32
+
+            {nop; xm.vladd Q(iter)} ///
+            xm.vstrpv Q(iter), mask32 ///
+
+
+        mv table, a5
+         addi t3,sp, (STACK_VEC_TMP_IMAG)*4        
+         mv iter, a6
+
+        .L_inner_loop_top:
+           // {addi t3, sp, STACK_VEC_NEG_ONES*4; xm.vclrdr}
+
+
+            { nop                                           ;   xm.vldr t3}
+            {   addi t3,sp, (STACK_VEC_TMP2)*4            ;   xm.vlmul0 vec_neg_one}
+            xm.vlmul1 vec_neg_one
+                xm.vstrpv t3, mask32
+
+                {nop; xm.vladd t3} ///
+                xm.vstrpv t3, mask32 ///
+
+
+            { nop                                           ;   xm.vclrdr                                  }
+            {   add table, table, _32                   ;   xm.vldc t3}
+            {   addi t3,sp, (STACK_VEC_TMP_REAL)*4        ;   xm.vlmacc0 table}
+            xm.vlmacc1 table
+            {   sub table, table, _32                   ;   xm.vldc t3}
+            {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 table}
+            xm.vlmacc1 table
+            xm.vlsat t3
+             addi t3,sp, (STACK_VEC_TMP_REAL)*4  
+            
+                xm.vstrpv t3, mask32
+            {   add table, table, _32                   ;   xm.vclrdr                                  }
+            addi t3,sp, (STACK_VEC_TMP_IMAG)*4       
+            {nop ;   xm.vlmacc0 table}
+            xm.vlmacc1 table
+            {   sub table, table, _32                   ;   xm.vldc t3}
+            {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 table}
+            xm.vlmacc1 table
+            xm.vlsat t3
+             addi t3,sp, (STACK_VEC_TMP_IMAG)*4     
+            
+                xm.vstrpv t3, mask32   
+            {   add table, table, _32                   ;   xm.vsign                                   }
+            {   addi iter, iter, -1                       ;   xm.vlmul0 t3} // imag = |imag|
+            xm.vlmul1 t3
+                xm.vstrpv t3, mask32
+
+                {nop; xm.vladd t3} ///
+                xm.vstrpv t3, mask32 ///
+
+            {   add table, table, _32                   ;   xm.bt iter, .L_inner_loop_top              }
+
+        {   addi t3,sp, (STACK_VEC_TMP_REAL)*4        ; nop                                           }
+        {   addi length, length, -1                   ;   xm.vldr t3}
+        {   add a, a, _32                           ;   xm.vstr a}
+        bnez length, .L_outer_loop_top
+    .L_outer_loop_bot:  
+
+    {   addi t3,sp, (STACK_VEC_TMP_REAL)*4        ;  nop                }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    beqz tail_bytes, .L_done
+        xm.vlashr b_real, b_shr
+        xm.vstrpv t3, tail_bytes
+    {   add b_real, b_real, _32                 ;   xm.vsign                                   }
+    addi Q(iter),sp, (STACK_VEC_TMP_IMAG)*4    
+    {nop ;   xm.vlmul0 t3}
+    xm.vlmul1 t3
+    xm.vstrpv t3, mask32
+
+                {nop; xm.vladd t3} ///
+                xm.vstrpv t3, mask32 ///
+
+        xm.vlashr b_imag, b_shr
+        xm.vstrpv Q(iter), tail_bytes
+    {   add b_imag, b_imag, _32                 ;   xm.vsign                                   }
+    { nop                                           ;   xm.vlmul0 Q(iter)}
+    xm.vlmul1 Q(iter)
+    xm.vstrpv Q(iter), mask32
+
+                {nop; xm.vladd Q(iter)} ///
+                xm.vstrpv Q(iter), mask32 ///
+
+
+    mv table, a5
+    addi t3,sp, (STACK_VEC_TMP_IMAG)*4     
+    mv iter, a6 
+
+    .L_inner_loop2_top:
+        // {   ldaw x28, sp[STACK_VEC_NEG_ONES]        ;   vclrdr                                  }
+        addi t3,sp, (STACK_VEC_TMP_IMAG)*4        
+        {nop;   xm.vldr t3}
+        {   addi t3,sp, (STACK_VEC_TMP2)*4            ;   xm.vlmul0 vec_neg_one}
+         xm.vlmul1 vec_neg_one
+         xm.vstrpv t3, mask32
+
+                {nop; xm.vladd t3} ///
+                xm.vstrpv t3, mask32 ///
+
+        { nop                                           ;   xm.vclrdr                                  }
+        {   add table, table, _32                   ;   xm.vldc t3}
+        {   addi t3,sp, (STACK_VEC_TMP_REAL)*4        ;   xm.vlmacc0 table}
+        xm.vlmacc1 table
+        {   sub table, table, _32                   ;   xm.vldc t3}
+        {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 table}
+        xm.vlmacc1 table
+         xm.vlsat t3
+         addi t3,sp, (STACK_VEC_TMP_REAL)*4    
+        
+            xm.vstrpv t3, mask32
+        {   add table, table, _32                   ;   xm.vclrdr                                  }
+        addi t3,sp, (STACK_VEC_TMP_IMAG)*4     
+        {nop   ;   xm.vlmacc0 table}
+        xm.vlmacc1 table
+        {   sub table, table, _32                   ;   xm.vldc t3}
+        {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 table}
+        xm.vlmacc1 table
+        xm.vlsat t3
+         addi t3,sp, (STACK_VEC_TMP_IMAG)*4  
+        
+            xm.vstrpv t3, mask32   
+        {   add table, table, _32                   ;   xm.vsign                                   }
+        {   addi iter, iter, -1                       ;   xm.vlmul0 t3} // imag = |imag|
+        xm.vlmul1 t3
+        xm.vstrpv t3, mask32
+
+                {nop; xm.vladd t3} ///
+                xm.vstrpv t3, mask32 ///
+
+        {   add table, table, _32                   ;   xm.bt iter, .L_inner_loop2_top             }
+
+    {   addi t3,sp, (STACK_VEC_TMP_REAL)*4        ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstd t3}
+        xm.vstrpv t3, tail_bytes
+        xm.vstrpv a, tail_bytes
+    { nop                                           ;   xm.vldd t3}
+    { nop                                           ;   xm.vstd t3}
+
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mul.S
new file mode 100644
index 00000000..d840c05d
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mul.S
@@ -0,0 +1,186 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_mul(
+    int16_t* a_real,
+    int16_t* a_imag,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const int16_t* c_real,
+    const int16_t* c_imag,
+    const unsigned length,
+    const right_shift_t sat);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (12+8*(NSTACKVECS)+4)
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_SAT   (NSTACKWORDS-16-4)
+
+#define FUNCTION_NAME vect_complex_s16_mul
+    
+#define a_re        x10
+#define a_im        x11
+#define b_re        x12
+#define b_im        x13
+#define c_re        x18
+#define c_im        x19
+#define len         x20
+#define vec_tmp     x21
+#define vec_sat     x22
+#define bytemask    x23
+
+
+
+/*
+    We're doing this:
+
+    vR  <- -1
+    vR  <- -1 * b.imag
+    vC  <- -b.imag  
+    acc <- 0
+    acc <- vC * c.imag
+    vC  <- b.real
+    acc <- acc + vC * c.real
+    vR  <- acc >> sat
+    a.real <- vR
+
+    (vC still has b.real)
+    acc <- 0
+    acc <- vC * c.imag
+    vC  <- b.imag
+    acc <- acc + vC * c.real
+    vR  <- acc >> sat
+    a.imag <- vR
+*/
+
+
+//.L_neg_ones:
+  //  .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    mv s2, a7
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;  nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli s3, s2, 16                          ;   xm.zexti s2, 16                             }
+    {   or s2, s2, s3                           ;   sw s8, 4                          (sp)}
+        xm.stdi  s2,s2, 0(t3)
+        xm.stdi  s2,s2, 8(t3)
+        xm.stdi  s2,s2, 16(t3)
+        xm.stdi  s2,s2, 24(t3)
+    mv bytemask, a6
+    {   li t3, 32                             ;   nop}
+    mv c_re, a4
+    {   srli len, bytemask, 4                    ;  nop}
+    mv c_im, a5
+    {   xm.zexti bytemask, 4                        ;   nop}
+    {   slli bytemask, bytemask, SIZEOF_LOG2_S16 ;   slli t3, t3, 3                         }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.vsetc t3}
+    la t3, vpu_vec_0x8000    
+    {   mv s8, t3                            ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        {   addi len, len, -1                         ;   xm.vldr t3}
+        {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_im}
+         xm.vlmul1 b_im
+            xm.vstrpv vec_tmp, t3
+        {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+        { nop                                           ;   xm.vclrdr                                  }
+        { nop                                           ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        { nop                                           ;   xm.vldc b_re}
+        { nop                                           ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        xm.vlsat vec_sat
+        {   li t3, 32                             ;   xm.vstr a_re}
+
+        
+        {   add b_re, b_re, t3                     ;   xm.vclrdr                                  }
+        {   add a_re, a_re, t3                     ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        {   add c_im, c_im, t3                     ;   xm.vldc b_im}
+        {   add b_im, b_im, t3                     ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        {   add c_re, c_re, t3                     ;   nop}
+        xm.vlsat vec_sat
+        {   add a_im, a_im, t3                     ;   xm.vstr a_im}
+        {   mv t3, s8                            ;   xm.bt len, .L_loop_top                     }
+    .L_loop_bot:
+
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff bytemask, .L_done                    }   /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    { nop                                           ;   xm.vldr t3}
+    {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_im}
+    xm.vlmul1 b_im
+        xm.vstrpv vec_tmp, t3
+    {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_re}
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+        xm.vstrpv a_re, bytemask
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_im}
+    { nop                                           ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+        xm.vstrpv a_im, bytemask   
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_nmacc.S
new file mode 100644
index 00000000..e68d09e2
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_nmacc.S
@@ -0,0 +1,211 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_nmacc(
+    int16_t* acc_real,
+    int16_t* acc_imag,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const int16_t* c_real,
+    const int16_t* c_imag,
+    const unsigned length,
+    const right_shift_t acc_shr,
+    const right_shift_t sat);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (12+8*(NSTACKVECS)+4)
+
+#define STACK_SAT       (NSTACKWORDS)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_SAT   (NSTACKWORDS-16-4)
+
+#define STACK_BYTEMASK  8
+
+#define FUNCTION_NAME vect_complex_s16_nmacc
+    
+#define acc_re      x10
+#define acc_im      x11
+#define b_re        x12
+#define b_im        x13
+#define c_re        x18
+#define c_im        x19
+#define len         x20
+#define vec_tmp     x21
+#define vec_sat     x22
+#define bytemask    x23
+#define acc_shr     x23
+
+
+/*
+    We're doing this:
+
+    vR  <- -1
+    vR  <- -1 * b.imag
+    vC  <- -b.imag  
+    tmp <- 0
+    tmp <- vC * c.imag
+    vC  <- b.real
+    tmp <- tmp + vC * c.real
+    vR  <- tmp >> sat
+    vR  <- acc.real - vR
+    acc.real <- vR
+
+    (vC still has b.real)
+    tmp <- 0
+    tmp <- vC * c.imag
+    vC  <- b.imag
+    tmp <- tmp + vC * c.real
+    vR  <- tmp >> sat
+    vR  <- acc.imag - vR
+    acc.imag <- vR
+*/
+
+
+.L_neg_ones:
+    .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   lw s2, (STACK_SAT)*4                   (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli s3, s2, 16                          ;   xm.zexti s2, 16                             }
+    {   or s2, s2, s3                           ;   sw s8, 4                          (sp)}
+        xm.stdi  s2,s2, 0(t3)
+        xm.stdi  s2,s2, 8(t3)
+        xm.stdi  s2,s2, 16(t3)
+        xm.stdi  s2,s2, 24(t3)
+    mv bytemask, a6
+    {   li t3, 32                             ;  nop}
+    mv c_re, a4
+    {   srli len, bytemask, 4                    ;   nop}
+    mv c_im, a5
+    {   xm.zexti bytemask, 4                        ;   nop}
+    {   slli bytemask, bytemask, SIZEOF_LOG2_S16 ;   slli t3, t3, 3                         }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */
+    { nop                                           ;   sw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    mv acc_shr, a7
+    { nop                                           ;   nop}
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.vsetc t3}
+    la t3, vpu_vec_0x8000
+    {   mv s8, t3                            ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        {   xm.mkmski t3, 32                           ; nop                                           }
+            xm.vlashr acc_re, acc_shr
+            xm.vstrpv acc_re, t3
+            xm.vlashr acc_im, acc_shr
+            xm.vstrpv acc_im, t3
+        {   mv t3, s8                            ; nop                                           }
+        {   addi len, len, -1                         ;   xm.vldr t3}
+        {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_im}
+        xm.vlmul1 b_im
+            xm.vstrpv vec_tmp, t3
+        {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+        { nop                                           ;   xm.vclrdr                                  }
+        { nop                                           ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        { nop                                           ;   xm.vldc b_re}
+        { nop                                           ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        xm.vlsat vec_sat
+        { nop                                           ;   xm.vlsub acc_re}
+        {   li t3, 32                             ;   xm.vstr acc_re}
+        {   add b_re, b_re, t3                     ;   xm.vclrdr                                  }
+        {   add acc_re, acc_re, t3                 ;   xm.vlmacc0 c_im}
+        xm.vlmacc1 c_im
+        {   add c_im, c_im, t3                     ;   xm.vldc b_im}
+        {   add b_im, b_im, t3                     ;   xm.vlmacc0 c_re}
+        xm.vlmacc1 c_re
+        {   add c_re, c_re, t3                     ;  nop}
+         xm.vlsat vec_sat
+        { nop                                           ;   xm.vlsub acc_im}
+        {   add acc_im, acc_im, t3                 ;   xm.vstr acc_im}
+        {   mv t3, s8                            ;   xm.bt len, .L_loop_top                     }
+    .L_loop_bot:
+
+#undef bytemask
+#define bytemask len
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff bytemask, .L_done                    }   /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+        xm.vlashr acc_re, acc_shr
+        xm.vstrpv acc_re, bytemask
+        xm.vlashr acc_im, acc_shr
+        xm.vstrpv acc_im, bytemask
+    { nop                                           ;   xm.vldr t3}
+    {   xm.mkmski t3, 32                           ;   xm.vlmul0 b_im}
+    xm.vlmul1 b_im
+        xm.vstrpv vec_tmp, t3
+    {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_re}
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+    { nop                                           ;   xm.vlsub acc_re}
+        xm.vstrpv acc_re, bytemask
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vlmacc0 c_im}
+    xm.vlmacc1 c_im
+    { nop                                           ;   xm.vldc b_im}
+    { nop                                           ;   xm.vlmacc0 c_re}
+    xm.vlmacc1 c_re
+    xm.vlsat vec_sat
+    { nop                                           ;   xm.vlsub acc_im}
+        xm.vstrpv acc_im, bytemask   
+        xm.vstrpv vec_tmp, bytemask
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_real_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_real_mul.S
new file mode 100644
index 00000000..faced79a
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_real_mul.S
@@ -0,0 +1,142 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_real_mul(
+    int16_t* a_real,
+    int16_t* a_imag,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const int16_t c[],
+    const unsigned length,
+    const right_shift_t sat);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (16+8*(NSTACKVECS)+4)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_SAT   (NSTACKWORDS-16-4)
+
+
+#define FUNCTION_NAME vect_complex_s16_real_mul
+    
+#define a_re        x10
+#define a_im        x11
+#define b_re        x12
+#define b_im        x13
+#define c           x18
+#define len         x19
+#define vec_tmp     x20
+#define vec_sat     x21
+#define bytemask    x22
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    mv s2, a6
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;  nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli s3, s2, 16                          ;   xm.zexti s2, 16                             }
+    {   or s2, s2, s3                           ;   sw s8, 4                          (sp)}
+        xm.stdi  s2,s2, 0(t3)
+        xm.stdi  s2,s2, 8(t3)
+        xm.stdi  s2,s2, 16(t3)
+        xm.stdi  s2,s2, 24(t3)
+
+    mv len, a5 
+    {   li t3, 32                             ;   mv bytemask, len}
+    mv c, a4
+    {   srli len, bytemask, 4                    ;   nop}
+    {   xm.zexti bytemask, 4                        ; nop                                           }
+    {   slli bytemask, bytemask, SIZEOF_LOG2_S16 ;   slli t3, t3, 3                         }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.vsetc t3}
+    {   addi vec_sat,sp, (STACK_VEC_SAT)*4         ; nop                                           }
+    {   li t3, 32                             ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+#define _32     x28
+    .L_loop_top:
+        {   addi len, len, -1                         ;   xm.vclrdr                                  }
+        {   add c, c, _32                           ;   xm.vldc c}
+        {  nop                    ;   xm.vlmacc0 b_re}
+        xm.vlmacc1 b_re
+        {   add b_re, b_re, _32                     ;   nop}
+        xm.vlsat vec_sat
+        {   add a_re, a_re, _32                     ;   xm.vstr a_re}
+        { nop                                           ;   xm.vclrdr                                  }
+        {  nop                    ;   xm.vlmacc0 b_im}
+        xm.vlmacc1 b_im
+        {   add b_im, b_im, _32                     ;  nop}
+        xm.vlsat vec_sat
+        {   add a_im, a_im, _32                     ;   xm.vstr a_im}
+        { nop                                           ;   xm.bt len, .L_loop_top                     }
+
+    .L_loop_bot:
+#undef _32
+
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff bytemask, .L_done                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+
+
+    xm.vclrdr
+    xm.vstd vec_tmp
+    xm.vldc c
+    xm.vlmacc0 b_re
+    xm.vlmacc1 b_re
+    xm.vlsat vec_sat
+    xm.vstrpv a_re, bytemask
+    xm.vstrpv vec_tmp, bytemask
+    xm.vldd vec_tmp
+    xm.vstd vec_tmp
+    xm.vclrdr
+    xm.vlmacc0 b_im
+    xm.vlmacc1 b_im
+    xm.vlsat vec_sat
+    xm.vstrpv a_im, bytemask
+    xm.vstrpv vec_tmp, bytemask
+    xm.vldd vec_tmp
+    xm.vstd vec_tmp
+    nop
+
+.p2align 2
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_squared_mag.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_squared_mag.S
new file mode 100644
index 00000000..730e4313
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_squared_mag.S
@@ -0,0 +1,120 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s16_squared_mag(
+    int16_t a[],
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const unsigned length,
+    const right_shift_t sat);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (8+8*(NSTACKVECS)+4)
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_SAT   (NSTACKWORDS-16-4)
+
+#define FUNCTION_NAME vect_complex_s16_squared_mag
+    
+#define a           x10
+#define b_re        x11
+#define b_im        x12
+#define length      x13
+#define vec_tmp     x18
+#define vec_sat     x19
+#define bytemask    x20
+#define _32         x21
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+    mv s2, a4
+    {   addi t3,sp, (STACK_VEC_SAT)*4             ;   nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli s3, s2, 16                          ;   xm.zexti s2, 16                             }
+    {   or s2, s2, s3                           ;   li _32, 32                             }
+        xm.stdi  s2,s2, 0(t3)
+        xm.stdi  s2,s2, 8(t3)
+        xm.stdi  s2,s2, 16(t3)
+        xm.stdi  s2,s2, 24(t3)
+
+    { nop                                           ;   slli bytemask, length, SIZEOF_LOG2_S16   }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli bytemask, length, SIZEOF_LOG2_S16   \nMessage: The shift amount is not 32" */
+    {   srli length, length, 4                   ;   slli t3, _32, 3                         }
+    {   xm.zexti bytemask, 5                        ;   xm.vsetc t3}
+{   xm.ldawsp t3, STACK_VEC_SAT      *4       ; nop}
+{nop ;  xm.ldawsp vec_tmp, STACK_VEC_TMP*4         }
+    {   xm.mkmsk bytemask, bytemask                ;   xm.brff length, .L_loop_bot                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+
+        {   addi length, length, -1                   ;   xm.vclrdr                                  }
+        { nop                                           ;   xm.vldc b_re}
+        {  nop                     ;   xm.vlmacc0 b_re}
+        xm.vlmacc1 b_re
+        {   add b_re, b_re, _32                     ;   nop}
+        { nop                                           ;   xm.vldc b_im}
+        {  nop                    ;   xm.vlmacc0 b_im}
+        xm.vlmacc1 b_im
+        {   add b_im, b_im, _32                     ;   nop}
+        xm.vlsat t3
+        {   add a, a, _32                           ;   xm.vstr a}
+        { nop                                           ;   xm.bt length, .L_loop_top                  }
+
+    .L_loop_bot: //astew: worth jumping over to single issue mode for this?
+
+    { nop                                           ;   xm.brff bytemask, .L_done                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+    { nop                                           ;   xm.vldc b_re}
+    { nop                                           ;   xm.vlmacc0 b_re}
+    xm.vlmacc1 b_re
+    { nop                                           ;   xm.vldc b_im}
+    { nop                                           ;   xm.vlmacc0 b_im}
+    xm.vlmacc1 b_im
+    xm.vlsat t3
+    { nop                                           ;   xm.vstrpv a, bytemask                   }
+    { nop                                           ;   xm.vstrpv vec_tmp, bytemask             }
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+.L_done:
+    xm.lddsp  s3,s2,8
+    xm.lddsp  s5,s4,16
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_sum.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_sum.S
new file mode 100644
index 00000000..754b9dd3
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_sum.S
@@ -0,0 +1,156 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+complex_s16_t vect_complex_s16_sum(
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const unsigned length,
+    const right_shift_t sat);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (6)
+#define NSTACKWORDS     (8+(8*NSTACKVECS)+4)
+
+#define b_real      x10
+#define b_imag      x11
+#define length      x12
+#define sat         x13
+#define _32         x18
+#define tmp         x19
+#define tail_bytes  x20
+
+#define STACK_VEC_ZEROS     (NSTACKWORDS- 8-16-4)
+#define STACK_VEC_TMP       (NSTACKWORDS-16-16-4)
+#define STACK_VEC_TMP2       (NSTACKWORDS-8-2)
+
+#define STACK_LENGTH        6
+#define STACK_SAT           7
+
+#define FUNCTION_NAME vect_complex_s16_sum_OLLD //why did I think I needed to write this in assembly?
+    
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+
+     {   addi s6,sp, (STACK_VEC_TMP2)*4        ;   nop                                  }
+         addi s7, s6, (-30)
+
+        li t3, 0x0100
+    {   addi tmp, t3, 1                         ;   xm.vsetc t3}
+    {   srli length, length, 4                   ;   slli tail_bytes, length, 1               }
+    { nop                                           ;   xm.zexti tail_bytes, 5                      }
+    { nop                                           ;   sw length, (STACK_LENGTH)*4            (sp)}
+    { nop                                           ;   sw sat, (STACK_SAT)*4                  (sp)}
+
+    {   addi t3,sp, (STACK_VEC_ZEROS)*4           ;   xm.vclrdr                                  }
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vstd t3}
+
+        xm.stdi  tmp,tmp, 0(t3)
+        xm.stdi  tmp,tmp, 8(t3)
+        xm.stdi  tmp,tmp, 16(t3)
+        xm.stdi  tmp,tmp, 24(t3)
+
+    {   li _32, 32                             ;   xm.vldc t3}
+    {   addi t3,sp, (STACK_VEC_ZEROS)*4           ;   xm.brff length, .L_real_bot                  }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_real_top:
+
+        {   addi length, length, -1                   ;   xm.vlmaccr0 b_real}
+        xm.vlmaccr1 b_real
+        {nop ; xm.vstd s6}    
+        {nop ; xm.vldd s7}
+        {nop ; xm.vstr s6}
+        {nop ; xm.vldr s7}
+
+
+        {   add b_real, b_real, _32                 ;   xm.bt length, .L_real_top                  }
+
+    .L_real_bot:
+
+    {   sub t3, t3, tail_bytes                ;   xm.brff tail_bytes, .L_real_end              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vldc t3}
+    {   xm.mkmski tmp, 2                            ;   xm.vlmaccr0 b_imag}
+    xm.vlmaccr1 b_imag
+        {nop ; xm.vstd s6}    
+        {nop ; xm.vldd s7}
+        {nop ; xm.vstr s6}
+        {nop ; xm.vldr s7}
+
+    {   addi t3,sp, (STACK_SAT)*4                 ;  nop /* xm.vadddr */                                  }
+    {   addi t3,sp, 4                         ;   nop}
+    xm.vlsat t3
+        xm.vstrpv t3, tmp
+    { nop                                           ;   lw a0, 4                           (sp)}
+
+    .L_real_end:
+    
+    { nop                                           ;   lw length, (STACK_LENGTH)*4            (sp)}
+    {   addi t3,sp, (STACK_VEC_ZEROS)*4           ;   xm.vclrdr                                  }
+
+
+    .L_imag_top:
+
+        {   addi length, length, -1                   ;   xm.vlmaccr0 b_imag}
+        xm.vlmaccr1 b_imag
+        {nop ; xm.vstd s6}    
+        {nop ; xm.vldd s7}
+        {nop ; xm.vstr s6}
+        {nop ; xm.vldr s7}
+
+        {   add b_imag, b_imag, _32                 ;   xm.bt length, .L_real_top                  }
+
+    .L_imag_bot:
+
+    {   sub t3, t3, tail_bytes                ;   xm.brff tail_bytes, .L_imag_end              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vldc t3}
+    {   xm.mkmski tmp, 2                            ;   xm.vlmaccr0 b_imag}
+    xm.vlmaccr1 b_imag
+        {nop ; xm.vstd s6}    
+        {nop ; xm.vldd s7}
+        {nop ; xm.vstr s6}
+        {nop ; xm.vldr s7}
+
+    {   addi t3,sp, (STACK_SAT)*4                 ;    nop /* xm.vadddr */                               }
+    {   addi t3,sp, 4                         ;   nop}
+    xm.vlsat t3
+        xm.vstrpv t3, tmp
+    { nop                                           ;   lw a0, 4                           (sp)}
+
+    .L_imag_end:
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_to_complex_s32.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_to_complex_s32.S
new file mode 100644
index 00000000..88b1e5d9
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_to_complex_s32.S
@@ -0,0 +1,60 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+void vect_complex_s16_to_vect_complex_s32(
+    complex_s32_t* a,
+    const int16_t* b_real,
+    const int16_t* b_imag,
+    const unsigned length);
+*/
+
+#include "../asm_helper.h"
+
+#define NSTACKVECS      0
+#define NSTACKWORDS     (4 + (8*NSTACKVECS)+4)
+
+#define FUNCTION_NAME   vect_complex_s16_to_vect_complex_s32
+
+#define a               x10
+#define b_real          x11
+#define b_imag          x12
+#define length          x13
+#define tmp_real        x18
+#define tmp_imag        x19
+
+.text;  /* Translation error on this line: unexpected token at position 5. */ 
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.p2align 4
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in entsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+
+    // Can this be done faster? Or with the VPU at all?
+    .L_loop_top:  
+        addi length, length, -1
+        xm.ld16s tmp_real, length(b_real)
+        xm.ld16s tmp_imag, length(b_imag)
+        xm.std  tmp_real,tmp_imag, length(a)
+        bnez length, .L_loop_top
+
+    xm.lddsp  s3,s2,8
+    xm.retsp (NSTACKWORDS)*4       /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS       \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+    //.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 37. */ 
+    .set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 46. */ 
+    .set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 33. */ 
+    .set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 34. */ 
+    .set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 36. */ 
+    
+.L_func_end:
+    .size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_complex_scale.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_complex_scale.S
new file mode 100644
index 00000000..9bdb740a
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_complex_scale.S
@@ -0,0 +1,151 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_scale(
+    complex_s32_t* a,
+    const complex_s32_t* b,
+    const int32_t c_real,
+    const int32_t c_imag,
+    const unsigned length,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+
+*/
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (8+8+4)
+
+#define a           x10 
+#define b           x11 
+#define c_real      x12
+#define c_imag      x13
+#define length      x18
+#define b_shr       x19
+
+#define _32         x20
+#define tmp_vec     x21
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define FUNCTION_NAME vect_complex_s32_scale
+    
+    
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   li t3, 0                              }
+    {   li _32, 32                             ;   xm.vsetc t3}
+    
+        xm.stdi  c_real,c_imag, 0(tmp_vec)
+        xm.stdi  c_real,c_imag, 8(tmp_vec)
+        xm.stdi  c_real,c_imag, 16(tmp_vec)
+        xm.stdi  c_real,c_imag, 24(tmp_vec)
+
+    mv c_real, a6
+    {   xm.mkmski c_imag, 32                        ;   nop}
+        xm.vlashr tmp_vec, c_real
+        xm.vstrpv tmp_vec, c_imag
+
+    { nop                                           ;   xm.vldc tmp_vec}
+
+    #undef c_real
+    #undef c_imag
+    #define vec_count   x12
+    #define tail_bytes  x13
+
+    mv length, a4
+ //   { nop                                           ;   lw length, (STACK_LENGTH)*4            (sp)}
+ mv b_shr, a5
+    {   slli tail_bytes, length, 3               ;   nop}
+    {   xm.zexti tail_bytes, 5                      ;   srli vec_count, length, 2                }
+
+    #undef length
+
+    {   xm.mkmsk tail_bytes, tail_bytes            ;   xm.mkmski t3, 32                           }
+    { nop                                           ;   xm.brff vec_count, .L_loop_bot               }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    
+
+    .L_loop_top:
+            xm.vlashr b, b_shr
+            xm.vstrpv tmp_vec, t3
+        {   addi vec_count, vec_count, -1             ;   xm.vldd tmp_vec}
+        {   add b, b, _32                           ;   xm.vcmr0                                   }
+        { nop                                           ;   xm.vcmi0                                    }
+        {   add a, a, _32                           ;   xm.vstr a}
+        { nop                                           ;   xm.bt vec_count, .L_loop_top               }
+
+.L_loop_bot:
+
+    { nop                                           ;   xm.brff tail_bytes, .L_done                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+     nop
+
+
+    xm.vclrdr
+    xm.vstd tmp_vec
+    xm.vlashr b, b_shr
+    xm.vstrpv tmp_vec, tail_bytes
+    xm.vldd tmp_vec
+    xm.vcmr0
+    xm.vcmi0
+    xm.vstrpv tmp_vec, tail_bytes
+    xm.vstrpv a, tail_bytes
+    xm.vldd tmp_vec
+    xm.vstd tmp_vec
+
+.p2align 2
+
+
+    // {                                           ;   bf tail_bytes, .L_done                  }
+    // {                                           ;   vclrdr                                  }
+    // {                                           ;   vstd tmp_vec[0]                         }
+    //     vlashr b[0], b_shr
+    //     vstrpv tmp_vec[0], tail_bytes   
+    // {                                           ;   vldd tmp_vec[0]                         }
+    // {                                           ;   vcmr                                    }
+    // {                                           ;   vcmi                                    }
+    //     vstrpv tmp_vec[0], tail_bytes
+    //     vstrpv a[0], tail_bytes 
+    // {                                           ;   vldd tmp_vec[0]                         }
+    // {                                           ;   vstd tmp_vec[0]                         }
+
+.p2align 2
+.L_done:
+        xm.lddsp  s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+        xm.lddsp  s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_macc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_macc.S
new file mode 100644
index 00000000..e7fcd926
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_macc.S
@@ -0,0 +1,129 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_conj_macc(
+    complex_s32_t* acc,
+    const complex_s32_t* b,
+    const complex_s32_t* c,
+    const unsigned length,
+    const right_shift_t acc_shr,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+
+*/
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (8+8+4)
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_BYTEMASK  1
+
+#define acc         x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define tmp         x20
+#define tmp_vec     x21
+#define shr_acc     x22
+
+#define bytemask    len
+
+#define FUNCTION_NAME vect_complex_s32_conj_macc
+    
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    mv shr_acc,a4
+    mv shr_b, a5
+    mv shr_c, a6
+    {   slli t3, len, 3                         ;   nop}
+    {   li tmp, 32                             ;  nop}
+    {   xm.zexti t3, 5                             ;   srli len, len, 2                         }
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   xm.mkmsk t3, t3                          }
+    {   li t3, 0                              ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+    {   xm.mkmski t3, 32                           ;   xm.vsetc t3}
+    {   addi len, len, -1                         ;   xm.brff len, .L_loop_bot_s32                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top_s32:
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, t3
+        xm.vlashr c, shr_c
+	      xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldc tmp_vec}
+	      xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldd tmp_vec}
+    {   add b, b, tmp                           ;   xm.vcmcr0                                   }
+    {   add c, c, tmp                           ;   xm.vcmci0                                    }
+    { nop                                           ;   xm.vladd acc}
+    {   add acc, acc, tmp                       ;   xm.vstr acc}
+    {   addi len, len, -1                         ;   xm.bt len, .L_loop_top_s32                 }
+
+.L_loop_bot_s32:
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff len, .L_done_s32                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, bytemask
+        xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldc tmp_vec}
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, bytemask                      
+    { nop                                           ;   xm.vldd tmp_vec}
+    { nop                                           ;   xm.vcmcr0                                   }
+    {   mv t3, tmp_vec                        ;   xm.vcmci0                                    }
+    { nop                                           ;   xm.vladd acc}
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr tmp_vec}
+        xm.vstrpv acc, bytemask
+
+.L_done_s32:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_nmacc.S
new file mode 100644
index 00000000..2d54bf94
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_nmacc.S
@@ -0,0 +1,129 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_conj_nmacc(
+    complex_s32_t* acc,
+    const complex_s32_t* b,
+    const complex_s32_t* c,
+    const unsigned length,
+    const right_shift_t acc_shr,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+
+*/
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (8+8+4)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_BYTEMASK  1
+
+#define acc         x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define tmp         x20
+#define tmp_vec     x21
+#define shr_acc     x22
+
+#define bytemask    len
+
+#define FUNCTION_NAME vect_complex_s32_conj_nmacc
+    
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    mv shr_acc, a4
+    { nop                                           ;   nop}
+    mv shr_b, a5
+    {   slli t3, len, 3                         ;   nop}
+    mv shr_c, a6
+    {   li tmp, 32                             ;   nop}
+    {   xm.zexti t3, 5                             ;   srli len, len, 2                         }
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   xm.mkmsk t3, t3                          }
+    {   li t3, 0                              ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+    {   xm.mkmski t3, 32                           ;   xm.vsetc t3}
+    {   addi len, len, -1                         ;   xm.brff len, .L_loop_bot_s32                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top_s32:
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, t3
+        xm.vlashr c, shr_c
+	      xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldc tmp_vec}
+	      xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldd tmp_vec}
+    {   add b, b, tmp                           ;   xm.vcmcr0                                   }
+    {   add c, c, tmp                           ;   xm.vcmci0                                    }
+    { nop                                           ;   xm.vlsub acc}
+    {   add acc, acc, tmp                       ;   xm.vstr acc}
+    {   addi len, len, -1                         ;   xm.bt len, .L_loop_top_s32                 }
+
+.L_loop_bot_s32:
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff len, .L_done_s32                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, bytemask
+        xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldc tmp_vec}
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, bytemask                      
+    { nop                                           ;   xm.vldd tmp_vec}
+    { nop                                           ;   xm.vcmcr0                                   }
+    {   mv t3, tmp_vec                        ;   xm.vcmci0                                    }
+    { nop                                           ;   xm.vlsub acc}
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr tmp_vec}
+        xm.vstrpv acc, bytemask
+
+.L_done_s32:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate.S
new file mode 100644
index 00000000..02777efd
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate.S
@@ -0,0 +1,83 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+headroom_t vect_complex_s32_conjugate(
+    complex_s32_t* a,
+    const complex_s32_t* b,
+    const unsigned length);
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECTS     (1)
+#define NSTACKWORDS     (4 + 8*(NSTACKVECTS)+4)
+
+#define STACK_VEC_TMP   (NSTACKWORDS - 8-4)
+
+#define a           x10 
+#define b           x11 
+#define len         x12
+
+#define _32         x13
+#define tail        x18
+
+#define FUNCTION_NAME vect_complex_s32_conjugate
+    
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+    
+  { li _32, 32                 ; li t3, 0                  }
+  { slli tail, len, 3            ; xm.vsetc t3}
+lui t3, %hi(vpu_vec_complex_conj_op)
+    addi t3,t3, %lo(vpu_vec_complex_conj_op)
+
+  { xm.zexti tail, 5                ; srli len, len, 2             }
+  { xm.mkmsk tail, tail            ; xm.brff len, .L_loop_bot         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    
+  .L_loop_top:
+    { addi len, len, -1             ; xm.vldr t3}
+    { add b, b, _32               ; xm.vlmul0 b}
+    { add a, a, _32               ; xm.vstr a}
+    { nop                             ; xm.bt len, .L_loop_top         }
+  .L_loop_bot:
+  { nop                             ; xm.brff tail, .L_done            }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { nop                             ; xm.vclrdr                      }
+  { nop                             ; xm.vldr t3}
+  { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vlmul0 b}
+    xm.vstrpv a, tail
+
+  // To make sure the tail is captured in the headroom..
+  { nop                             ; xm.vstd t3}
+    xm.vstrpv t3, tail
+  { nop                             ; xm.vldd t3}
+  { nop                             ; xm.vstd t3}
+
+.L_done:
+    xm.lddsp  s3,s2,0
+  {   li a0, 31                ;   xm.vgetc t3}
+  {   xm.zexti t3, 5               ; nop                             }
+  {   sub a0, a0, t3           ;   xm.retsp (NSTACKWORDS)*4         }
+  
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate_mul.S
new file mode 100644
index 00000000..27b763af
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate_mul.S
@@ -0,0 +1,116 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_conj_mul(
+    complex_s32_t* a,
+    const complex_s32_t* b,
+    const complex_s32_t* c,
+    const unsigned length,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+
+*/
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (8+8+4)
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define tmp         x20
+#define tmp_vec     x21
+
+#define bytemask    len
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_BYTEMASK  1
+
+#define FUNCTION_NAME vect_complex_s32_conj_mul
+    
+    
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    mv shr_b, a4
+    {   slli t3, len, 3                         ;   nop}
+    mv shr_c, a5
+    {   li tmp, 32                             ;   nop}
+    {   xm.zexti t3, 5                             ;   srli len, len, 2                         }
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   xm.mkmsk t3, t3                          }
+    {   li t3, 0                              ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+    {   xm.mkmski t3, 32                           ;   xm.vsetc t3}
+    {   addi len, len, -1                         ;   xm.brff len, .L_loop_bot_s32                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top_s32:
+            xm.vlashr b, shr_b
+            xm.vstrpv tmp_vec, t3
+        { nop                                           ;   xm.vldd tmp_vec}
+            xm.vlashr c, shr_c
+            xm.vstrpv tmp_vec, t3
+        { nop                                           ;   xm.vldc tmp_vec}
+        {   add b, b, tmp                           ;   xm.vcmcr0                                   }
+        {   add c, c, tmp                           ;   xm.vcmci0                                    }
+        {   add a, a, tmp                           ;   xm.vstr a}
+        {   addi len, len, -1                         ;   xm.bt len, .L_loop_top_s32                 }
+
+    .L_loop_bot_s32:
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff len, .L_done_s32                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldd tmp_vec}
+        xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, bytemask                      
+    { nop                                           ;   xm.vldc tmp_vec}
+    { nop                                           ;   xm.vcmcr0                                   }
+    {   mv t3, tmp_vec                        ;   xm.vcmci0                                    }
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr tmp_vec}
+        xm.vstrpv a, bytemask
+
+.L_done_s32:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_macc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_macc.S
new file mode 100644
index 00000000..114b29b7
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_macc.S
@@ -0,0 +1,132 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_macc(
+    complex_s32_t* acc,
+    const complex_s32_t* b,
+    const complex_s32_t* c,
+    const unsigned length,
+    const right_shift_t acc_shr,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+
+*/
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (8+8+4)
+
+#define STACK_SHR_ACC   (NSTACKWORDS+1)
+#define STACK_SHR_B     (NSTACKWORDS+2)
+#define STACK_SHR_C     (NSTACKWORDS+3)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_BYTEMASK  1
+
+#define acc         x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define tmp         x20
+#define tmp_vec     x21
+#define shr_acc     x22
+
+#define bytemask    len
+
+#define FUNCTION_NAME vect_complex_s32_macc
+    
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+    mv shr_acc, a4
+    { nop                                           ;   nop}
+    mv shr_b, a5
+    {   slli t3, len, 3                         ;   nop}
+    mv shr_c, a6
+    {   li tmp, 32                             ;   nop}
+    {   xm.zexti t3, 5                             ;   srli len, len, 2                         }
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   xm.mkmsk t3, t3                          }
+    {   li t3, 0                              ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+    {   xm.mkmski t3, 32                           ;   xm.vsetc t3}
+    {   addi len, len, -1                         ;   xm.brff len, .L_loop_bot_s32                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top_s32:
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, t3
+        xm.vlashr b, shr_b
+	      xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldc tmp_vec}
+	      xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldd tmp_vec}
+    {   add b, b, tmp                           ;   xm.vcmr0                                   }
+    {   add c, c, tmp                           ;   xm.vcmi0                                    }
+    { nop                                           ;   xm.vladd acc}
+    {   add acc, acc, tmp                       ;   xm.vstr acc}
+    {   addi len, len, -1                         ;   xm.bt len, .L_loop_top_s32                 }
+
+.L_loop_bot_s32:
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff len, .L_done_s32                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, bytemask
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldc tmp_vec}
+        xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, bytemask                      
+    { nop                                           ;   xm.vldd tmp_vec}
+    { nop                                           ;   xm.vcmr0                                   }
+    {   mv t3, tmp_vec                        ;   xm.vcmi0                                    }
+    { nop                                           ;   xm.vladd acc}
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr tmp_vec}
+        xm.vstrpv acc, bytemask
+
+.L_done_s32:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mag.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mag.S
new file mode 100644
index 00000000..bf45a262
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mag.S
@@ -0,0 +1,166 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_mag(
+    int32_t a[],
+    const complex_s32_t* b,
+    const unsigned length,
+    const right_shift_t b_shr,
+    const complex_s32_t* rot_table
+    const unsigned table_rows);
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8+(8*NSTACKVECS)+4)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_TMP2  (NSTACKWORDS-16-4)
+
+#define FUNCTION_NAME vect_complex_s32_mag
+
+#define Q(R)    R
+
+#define a           x10 
+#define b           x11 
+#define length      x12
+#define b_shr       x13
+#define _32         x18
+#define vec_tmp     x19
+#define mask32      x20
+#define tmp         x21
+#define iter        x22
+#define tail_bytes  x23
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+    {   li s8, 0                              ;   sw s8, 4                          (sp)}
+
+    {   li _32, 32                             ;   li t3, 0                              }
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.vsetc t3}
+    
+    {   srli length, length, 2                   ;   mv tail_bytes, length                  }
+    {   xm.mkmski mask32, 32                        ;   xm.zexti tail_bytes, 2                      }
+    {   slli tail_bytes, tail_bytes, 2           ;   xm.brff length, .L_outer_loop_bot            }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_outer_loop_top:
+        //     vlashr b[0], b_shr
+        //     vstrpv vec_tmp[0], mask32
+        // {   add b, b, _32                           ;   vsign                                   }
+        // {                                           ;   vlmul vec_tmp[0]                        }
+        //     vstrpv vec_tmp[0], mask32
+        // {                                           ;   ldw x28, sp[STACK_ROT_TABLE]            }
+        // {                                           ;   ldw iter, sp[STACK_TABLE_ROWS]          }        
+
+            xm.vlashr b, b_shr
+        { nop                                           ;   xm.vsign                                   }
+        mv t3, a4 
+            xm.vstrpv vec_tmp, mask32
+            xm.vlashr b, b_shr
+        {   add b, b, _32                           ;   xm.vlmul0 vec_tmp}
+        mv iter, a5
+            xm.vstrpv vec_tmp, mask32
+
+        .L_inner_loop_top:
+            {   addi iter, iter, -1                       ;   xm.vldd vec_tmp}
+            {   add t3, t3, _32                       ;   xm.vldc t3}
+            { nop                                           ;   xm.vcmr0                                   }
+            { nop                                           ;   xm.vcmi0                                    }
+                xm.vstrpv vec_tmp, mask32
+            { nop                                           ;   xm.vsign                                   }
+            { nop                                           ;   xm.vlmul0 vec_tmp}
+                xm.vstrpv vec_tmp, mask32
+            { nop                                           ;   xm.bt iter, .L_inner_loop_top              }
+        
+        { nop                                           ;   xm.vstr vec_tmp}
+        {   addi length, length, -1                   ;   lw t3,0                     ( vec_tmp)}
+        { nop                                           ;   sw t3,0                           ( a)}
+        { nop                                           ;   lw t3,8                     ( vec_tmp)}
+        { nop                                           ;   sw t3,4                           ( a)}
+        {   addi a, a, 8                             ;   lw t3,16                     ( vec_tmp)}
+        { nop                                           ;   sw t3,0                           ( a)}
+        { nop                                           ;   lw t3,24                     ( vec_tmp)}
+        {   addi a, a, 8                             ;   sw t3,4                           ( a)}
+        { nop                                           ;   xm.bt length, .L_outer_loop_top            }
+
+    .L_outer_loop_bot:  
+
+    {   xm.mkmsk tail_bytes, tail_bytes            ;   xm.brff tail_bytes, .L_done                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr b, b_shr
+
+    mv t3, a4
+    { nop                                           ;   xm.vsign                                   }
+        xm.vstrpv vec_tmp, mask32
+        xm.vlashr b, b_shr
+    { nop                                           ;   xm.vlmul0 vec_tmp}
+    mv iter, a5
+        xm.vstrpv vec_tmp, mask32
+              
+    .L_inner_loop2_top:
+        {   addi iter, iter, -1                       ;   xm.vldd vec_tmp}
+        {   add t3, t3, _32                       ;   xm.vldc t3}
+        { nop                                           ;   xm.vcmr0                                   }
+        { nop                                           ;   xm.vcmi0                                    }
+            xm.vstrpv vec_tmp, mask32
+        { nop                                           ;   xm.vsign                                   }
+        { nop                                           ;   xm.vlmul0 vec_tmp}
+            xm.vstrpv vec_tmp, mask32
+        { nop                                           ;   xm.bt iter, .L_inner_loop2_top             }
+        
+    { nop                                           ;   lw t3,0                     ( vec_tmp)}
+    { nop                                           ;   sw t3,0                     ( vec_tmp)}
+    { nop                                           ;   lw t3,8                     ( vec_tmp)}
+    { nop                                           ;   sw t3,4                     ( vec_tmp)}
+    { nop                                           ;   lw t3,16                     ( vec_tmp)}
+    { nop                                           ;   sw t3,8                     ( vec_tmp)}
+    { nop                                           ;   lw t3,24                     ( vec_tmp)}
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   sw t3,12                     ( vec_tmp)}
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstd t3}
+        xm.vstrpv t3, tail_bytes
+        xm.vstrpv a, tail_bytes
+    { nop                                           ;   xm.vldd t3}
+    { nop                                           ;   xm.vstd t3}
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mul.S
new file mode 100644
index 00000000..b89b4f39
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mul.S
@@ -0,0 +1,116 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_mul(
+    complex_s32_t* a,
+    const complex_s32_t* b,
+    const complex_s32_t* c,
+    const unsigned length,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+
+*/
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (8+8+4)
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define tmp         x20
+#define tmp_vec     x21
+
+#define bytemask    len
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_BYTEMASK  1
+
+#define FUNCTION_NAME vect_complex_s32_mul
+    
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+    mv shr_b, a4
+    {   slli t3, len, 3                         ;   nop}
+    mv shr_c, a5
+    {   li tmp, 32                             ;   nop}
+    {   xm.zexti t3, 5                             ;   srli len, len, 2                         }
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   xm.mkmsk t3, t3                          }
+    {   li t3, 0                              ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+    {   xm.mkmski t3, 32                           ;   xm.vsetc t3}
+    {   addi len, len, -1                         ;   xm.brff len, .L_loop_bot_s32                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top_s32:
+        xm.vlashr b, shr_b
+	    xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldc tmp_vec}
+	    xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldd tmp_vec}
+    {   add b, b, tmp                           ;   xm.vcmr0                                   }
+    {   add c, c, tmp                           ;   xm.vcmi0                                    }
+    {   add a, a, tmp                           ;   xm.vstr a}
+    {   addi len, len, -1                         ;   xm.bt len, .L_loop_top_s32                 }
+
+.L_loop_bot_s32:
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff len, .L_done_s32                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldc tmp_vec}
+        xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, bytemask                      
+    { nop                                           ;   xm.vldd tmp_vec}
+    { nop                                           ;   xm.vcmr0                                   }
+    {   mv t3, tmp_vec                        ;   xm.vcmi0                                    }
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr tmp_vec}
+        xm.vstrpv a, bytemask
+
+.L_done_s32:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_nmacc.S
new file mode 100644
index 00000000..204fb92b
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_nmacc.S
@@ -0,0 +1,128 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_nmacc(
+    complex_s32_t* acc,
+    const complex_s32_t* b,
+    const complex_s32_t* c,
+    const unsigned length,
+    const right_shift_t acc_shr,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+
+*/
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (8+8+4)
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_BYTEMASK  1
+
+#define acc         x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define tmp         x20
+#define tmp_vec     x21
+#define shr_acc     x22
+
+#define bytemask    len
+
+#define FUNCTION_NAME vect_complex_s32_nmacc
+    
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+
+    mv shr_acc, a4
+    mv shr_b, a5
+    mv shr_c, a6
+    {   slli t3, len, 3                         ;   nop}
+    {   li tmp, 32                             ;   nop}
+    {   xm.zexti t3, 5                             ;   srli len, len, 2                         }
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   xm.mkmsk t3, t3                          }
+    {   li t3, 0                              ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+    {   xm.mkmski t3, 32                           ;   xm.vsetc t3}
+    {   addi len, len, -1                         ;   xm.brff len, .L_loop_bot_s32                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top_s32:
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, t3
+        xm.vlashr b, shr_b
+	      xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldc tmp_vec}
+	      xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, t3
+    { nop                                           ;   xm.vldd tmp_vec}
+    {   add b, b, tmp                           ;   xm.vcmr0                                   }
+    {   add c, c, tmp                           ;   xm.vcmi0                                    }
+    { nop                                           ;   xm.vlsub acc}
+    {   add acc, acc, tmp                       ;   xm.vstr acc}
+    {   addi len, len, -1                         ;   xm.bt len, .L_loop_top_s32                 }
+
+.L_loop_bot_s32:
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff len, .L_done_s32                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, bytemask
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldc tmp_vec}
+        xm.vlashr c, shr_c
+        xm.vstrpv tmp_vec, bytemask                      
+    { nop                                           ;   xm.vldd tmp_vec}
+    { nop                                           ;   xm.vcmr0                                   }
+    {   mv t3, tmp_vec                        ;   xm.vcmi0                                    }
+    { nop                                           ;   xm.vlsub acc}
+        xm.vstrpv tmp_vec, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr tmp_vec}
+        xm.vstrpv acc, bytemask
+
+.L_done_s32:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_real_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_real_mul.S
new file mode 100644
index 00000000..44140a7a
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_real_mul.S
@@ -0,0 +1,134 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_real_mul(
+    complex_s32_t* a,
+    const complex_s32_t* b,
+    const int32_t c[],
+    const unsigned length,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+
+*/
+
+.text
+.p2align 2
+
+#define FUNCTION_NAME vect_complex_s32_real_mul
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (12+8*NSTACKVECS+4)
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_C1    (NSTACKWORDS-16-4)
+
+
+#define STACK_TAIL_LEN  (8)
+    
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define length      x13
+#define b_shr       x18
+#define c_shr       x19
+#define _32         x20
+#define vec_tmp     x21
+#define vec_c1      x22
+#define tmpA        x23
+#define tmpB        x24
+
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+    {   li t3, 0                              ;   sw s8, 4                          (sp)}
+    {   mv t3, length                         ;   xm.vsetc t3}
+    mv b_shr, a4
+    {   srli length, length, 2                   ;  nop}
+    mv c_shr, a5
+    {   xm.zexti t3, 2                             ;   nop}
+    
+{   xm.ldawsp vec_tmp, STACK_VEC_TMP*4         ; nop }
+{   xm.ldawsp vec_c1, STACK_VEC_C1*4           ; nop }
+    {   xm.mkmski t3, 32                           ;   sw t3, (STACK_TAIL_LEN)*4             (sp)}
+    {   li _32, 32                             ;   xm.brff length, .L_loop_bot                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    
+
+    .L_loop_top:
+    
+            xm.vlashr c, c_shr 
+            xm.vstrpv vec_tmp, t3
+            xm.lddsp  tmpA,tmpB,((STACK_VEC_TMP/2) + 0)*8
+            xm.stdsp  tmpA,tmpA,((STACK_VEC_C1/2)  + 0)*8
+            xm.stdsp  tmpB,tmpB,((STACK_VEC_C1/2)  + 1)*8
+            xm.lddsp  tmpA,tmpB,((STACK_VEC_TMP/2) + 1)*8
+        {   addi c, c, 8                             ;   addi length, length, -1                   }
+            xm.stdsp  tmpA,tmpA,((STACK_VEC_C1/2) + 2)*8
+            xm.stdsp  tmpB,tmpB,((STACK_VEC_C1/2) + 3)*8
+
+            xm.vlashr b, b_shr
+        {   add b, b, _32                           ;   xm.vlmul0 vec_c1}
+        {   add a, a, _32                           ;   xm.vstr a}
+        {   addi c, c, 8                             ;   xm.bt length, .L_loop_top                  }
+
+.L_loop_bot:
+
+    {   addi t3,sp, (STACK_VEC_C1)*4              ;   lw length, (STACK_TAIL_LEN)*4          (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   slli length, length, 3                   ;   xm.brff length, .L_done                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   xm.mkmsk length, length                    ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+        xm.vlashr c, c_shr
+        xm.vstrpv vec_tmp, length
+        xm.lddsp  tmpA,tmpB,((STACK_VEC_TMP / 2) + 0)*8
+        xm.stdsp  tmpA,tmpA,((STACK_VEC_C1  / 2) + 0)*8
+        xm.stdsp  tmpB,tmpB,((STACK_VEC_C1  / 2) + 1)*8
+        xm.lddsp  tmpA,tmpB,((STACK_VEC_TMP / 2) + 1)*8
+        xm.stdsp  tmpA,tmpA,((STACK_VEC_C1  / 2) + 2)*8
+        xm.stdsp  tmpB,tmpB,((STACK_VEC_C1  / 2) + 3)*8
+        xm.vlashr b, b_shr
+    { nop                                           ;   xm.vlmul0 t3}
+        xm.vstrpv a, length
+        xm.vstrpv vec_tmp, length
+    { nop                                           ;   xm.vldd vec_tmp}
+    { nop                                           ;   xm.vstd vec_tmp}
+
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_squared_mag.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_squared_mag.S
new file mode 100644
index 00000000..7dcf2db1
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_squared_mag.S
@@ -0,0 +1,120 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_complex_s32_squared_mag(
+    int32_t a[],
+    const complex_s32_t* b,
+    const unsigned length,
+    const right_shift_t b_shr);
+
+*/
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (7+8+1)
+
+#define a               x10 
+#define b               x11 
+#define length          x12
+#define b_shr           x13
+#define vec_count       x18
+#define _16             x19
+#define vec_tmp         x20
+#define tail_mask       x21
+#define vec_ones        x22
+#define tmpA            x23
+#define tmpB            x24
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-1)
+
+#define FUNCTION_NAME vect_complex_s32_squared_mag
+    
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,0
+    xm.stdsp  s5,s4,8
+    xm.stdsp  s7,s6,16
+    { nop                                           ;   sw s8, 24                          (sp)}
+
+    {   li _16, 16                             ;   srli vec_count, length, 2                }
+    {   li t3, 0                              ;   slli tail_mask, length, 3                }
+    {   xm.zexti tail_mask, 5                       ;   xm.vsetc t3}
+
+lui t3, %hi(vpu_vec_complex_ones)                                                  
+        addi t3,t3, %lo(vpu_vec_complex_ones)                                                  
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   mv vec_ones, t3                       }
+    {   xm.mkmski t3, 32                           ;   xm.brff vec_count, .L_loop_bot               }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+            xm.vlashr b, b_shr
+            xm.vstrpv vec_tmp, t3
+        {   add b, b, _16                           ;   xm.vldc vec_tmp}
+        {   add b, b, _16                           ;   xm.vldd vec_tmp}
+        {   addi vec_count, vec_count, -1             ;   xm.vcmcr0                                   }
+        { nop /* zero out imag part so that we don't */ ;   xm.vlmul0 vec_ones}
+        { nop /* clobber the headroom counter        */ ;   xm.vstr vec_tmp}
+        { nop                                           ;   lw tmpA,0                    ( vec_tmp)}
+        { nop                                           ;   lw tmpB,8                    ( vec_tmp)}
+            xm.stdi  tmpA,tmpB, 0(a)
+        { nop                                           ;   lw tmpA,16                    ( vec_tmp)}
+        { nop                                           ;   lw tmpB,24                    ( vec_tmp)}
+            xm.stdi  tmpA,tmpB, 8(a)
+        {   add a, a, _16                           ;   xm.bt vec_count, .L_loop_top               }
+
+    .L_loop_bot:
+    {   xm.zexti length, 2                          ;   xm.brff tail_mask, .L_done                   }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   slli length, length,1                    ;   xm.vclrdr                                  }
+    {   xm.mkmsk tail_mask, tail_mask              ;   xm.vstd vec_tmp}
+        xm.vlashr b, b_shr
+        xm.vstrpv vec_tmp, tail_mask
+    {   li tmpA, 6                             ;   xm.vldd vec_tmp}
+    {   sub length, tmpA, length                ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vcmcr0                                   }
+    { nop                                           ;   xm.vlmul0 vec_ones}
+    { xm.shli length, length, 1                 ;   xm.vstr vec_tmp}
+    { nop                                           ;   xm.bru length                              }
+    { nop                                           ;   lw t3,16                     ( vec_tmp)}
+    { nop                                           ;   sw t3,8                           ( a)}
+    { nop                                           ;   lw t3,8                     ( vec_tmp)}
+    { nop                                           ;   sw t3,4                           ( a)}
+    { nop                                           ;   lw t3,0                     ( vec_tmp)}
+    { nop                                           ;   sw t3,0                           ( a)}
+.L_done:
+        xm.lddsp  s3,s2,0
+        xm.lddsp  s5,s4,8
+        xm.lddsp  s7,s6,16
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 24                          (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_sum.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_sum.S
new file mode 100644
index 00000000..ebf39336
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_sum.S
@@ -0,0 +1,150 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+#include "../asm_helper.h"
+
+/*  
+
+
+void vect_complex_s32_sum(
+    const complex_s64_t* res,
+    const complex_s32_t* b,
+    const unsigned length,
+    const right_shift_t b_shr);
+
+
+*/
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8+(8*NSTACKVECS)+4)
+
+#define b           x10
+#define b_shr       x11
+#define length      x12
+#define _32         x13
+#define tmp         x18
+#define tail_bytes  x19
+
+#define STACK_VEC_ZEROS     (NSTACKWORDS- 8-4)
+#define STACK_VEC_TMP       (NSTACKWORDS-16-4)
+
+#define STACK_RES   (1)
+
+#define FUNCTION_NAME vect_complex_s32_sum
+    
+
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+
+    {   mv b, a1                               ;   sw a0, (STACK_RES)*4                   (sp)}
+
+
+    {   mv b_shr, a3                           ;   slli tail_bytes, length, 3               }
+    { nop                                           ;   xm.zexti tail_bytes, 5                      }
+
+    {   addi t3,sp, (STACK_VEC_ZEROS)*4           ;   xm.vclrdr                                  }
+    {   li t3, 0                              ;   xm.vstd t3}
+    {   xm.slt tmp, b_shr, t3                     ;   xm.vsetc t3}
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   xm.assertn tmp /*Cannot be negative shift*/ }
+lui t3, %hi(vpu_vec_0x40000000)
+        addi t3,t3, %lo(vpu_vec_0x40000000)
+        xm.vlashr t3, b_shr
+    {   li t3, 0                              ;   xm.vstr tmp}
+    {   srli length, length, 2                   ;   xm.vldc tmp}
+    {   li _32, 32                             ;   xm.vsetc t3}
+    { nop                                           ;   xm.vclrdr                                  }
+
+    { nop                                           ;   xm.brff length, .L_loop_bot                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+
+        {   addi length, length, -1               ;   xm.vlmacc0 b}
+        {   add b, b, _32                       ;   xm.bt length, .L_loop_top                  }
+
+    .L_loop_bot:
+
+    {   addi t3,sp, (STACK_VEC_ZEROS)*4           ;   xm.brff tail_bytes, .L_get_res               }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   sub t3, t3, tail_bytes                ; nop                                           }
+    { nop                                           ;   xm.vldc t3}
+    {   addi t3,sp, (STACK_VEC_ZEROS)*4           ;   xm.vlmacc0 b}
+        
+
+/*  We've got 8 40-bit accumulators. Lower 32 bits are in vR, upper 8 in vD.
+    vD does appear to sign-extend the values up to 64 bits.
+
+    (vD:vR)[k] ==  ((int32_t)vD[k])*(2^32) + ((uint32_t)vR[k]) */
+
+#define real_hi     x10
+#define real_lo     x11
+#define imag_hi     x12
+#define imag_lo     x13
+#define num         x19
+#define tmp_re      x20
+#define tmp_im      x21
+
+// astew [2020-10-16]: There's probably a faster way to do this. See the VPU-based solution I found for vect_s32_sum for
+//                     non-complex values
+
+.L_get_res:
+    {   li real_hi, 0                          ;   li imag_hi, 0                          }
+    {   li num, 1                              ;   xm.vstr tmp}
+        xm.lddi  real_lo,imag_lo, 0(tmp)
+        xm.lddi  tmp_re,tmp_im, 8(tmp)
+        xm.maccu real_hi, real_lo, num, tmp_re
+        xm.maccu imag_hi, imag_lo, num, tmp_im
+        xm.lddi  tmp_re,tmp_im, 16(tmp)
+        xm.maccu real_hi, real_lo, num, tmp_re
+        xm.maccu imag_hi, imag_lo, num, tmp_im
+        xm.lddi  tmp_re,tmp_im, 24(tmp)
+        xm.maccu real_hi, real_lo, num, tmp_re
+        xm.maccu imag_hi, imag_lo, num, tmp_im
+    { nop                                           ;   xm.vfttf                                   }
+    {   li num, 2                              ;   xm.vstd t3}
+        xm.lddi  tmp_re,tmp_im, 0(t3)
+    {   add real_hi, real_hi, tmp_re            ;   add imag_hi, imag_hi, tmp_im            }
+
+    // astew [2021-09-28]: ... what was the purpose of these next 4 instructions..?
+    //                     maybe at the time I was thinking the lower word should be
+    //                     interpreted as signed?
+    // {   shr tmp_re, real_lo, 1                  ;   zext real_lo, 1                         }
+    // {   shr tmp_im, imag_lo, 1                  ;   zext imag_lo, 1                         }
+    //     maccs real_hi, real_lo, num, tmp_re
+    //     maccs imag_hi, imag_lo, num, tmp_im
+    { nop                                           ;   lw tmp, (STACK_RES)*4                  (sp)}
+        xm.stdi  real_lo,real_hi, 0(tmp)
+        xm.stdi  imag_lo,imag_hi, 8(tmp)
+
+    
+
+.L_done:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_to_complex_s16.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_to_complex_s16.S
new file mode 100644
index 00000000..732d45fe
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_to_complex_s16.S
@@ -0,0 +1,107 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+
+
+#if defined(__VX4B__)
+
+
+/*  
+
+headroom_t vect_complex_s32_to_vect_complex_s16(
+    int16_t* a_real,
+    int16_t* a_imag,
+    const complex_s32_t* b,
+    const unsigned length,
+    const right_shift_t b_shr);
+
+*/
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8 + (8*NSTACKVECS)+4)
+
+#define FUNCTION_NAME   vect_complex_s32_to_vect_complex_s16
+
+#define STACK_VEC_TMP  (NSTACKWORDS-16-4)
+
+#define STACK_B_SHR     (NSTACKWORDS+1)
+
+#define a_real      x10
+#define a_imag      x11
+#define b           x12
+#define len         x13
+#define b_shr       x18
+#define tail        x19
+#define _28         x20
+#define mask        x21
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+    {   li t3, 0                              ;   li _28, 28                             }
+    addi b,b,-4
+    {   li t3, 16                             ;   xm.vsetc t3}
+    {   srli len, len, 2                         ;   slli tail, len, 1                        }
+    mv b_shr, a4
+    {   xm.mkmski mask, 8                           ;  nop}
+    {   sub b_shr, b_shr, t3                   ;   xm.zexti tail, 3                            }
+lui t3, %hi(vpu_vec_complex_pos_j)
+        addi t3,t3, %lo(vpu_vec_complex_pos_j)
+    {   xm.mkmsk tail, tail                        ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+            xm.vlashr b, b_shr
+        {   addi b, b, 4                             ;   xm.vlmul0 t3}   
+        {   addi len, len, -1                         ;   xm.vdepth16                                }
+        { nop                                           ;   xm.vdepth16                                }
+            xm.vstrpv a_real, mask
+            xm.vlashr b, b_shr         
+        {   add b, b, _28                           ;   xm.vlmul0 t3}
+        {   addi a_real, a_real, 8                   ;   xm.vdepth16                                }
+        { nop                                           ;   xm.vdepth16                                }
+            xm.vstrpv a_imag, mask
+        {   addi a_imag, a_imag, 8                   ;   xm.bt len, .L_loop_top                     }
+
+
+.L_loop_bot:
+
+
+
+    beqz tail, .L_finish
+    xm.vlashr b, b_shr
+    xm.vlmul0 t3
+    xm.vdepth16
+    xm.vdepth16
+    xm.vstrpv a_real, tail
+    addi b, b, 4
+    xm.vlashr b, b_shr         
+    xm.vlmul0 t3
+    xm.vdepth16
+    xm.vdepth16
+    xm.vstrpv a_imag, tail
+
+
+.L_finish:
+        xm.lddsp  s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+        xm.lddsp  s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_macc.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_macc.S
new file mode 100644
index 00000000..1ef7cae7
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_macc.S
@@ -0,0 +1,77 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+/*
+  Complex conjugate multiply-accumulate
+
+  a[k] = a[k] + b[k] (*c) conjugate(c[k])
+
+  Note: a[], b[] and c[] must all be 8-byte aligned
+
+  void vect_complex_f32_conj_macc(
+      complex_float_t a[],
+      const complex_float_t b[],
+      const complex_float_t c[],
+      const unsigned length);
+
+*/
+
+#define FUNC_NAME     vect_complex_f32_conj_macc
+#define NSTACKWORDS   12
+
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+#define a         x10
+#define b         x11
+#define c         x12
+#define len       x13
+#define B_re      x18
+#define B_im      x19
+#define C_re      x20
+#define C_im      x21
+#define A_re      x22
+#define A_im      x23
+
+.p2align 4
+FUNC_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+  xm.stdsp  s7,s6,16
+
+{ li t3, 0                  ; addi len, len, -1             }
+.L_loop_top:
+    xm.ldd  A_re,A_im, len(a)
+    xm.ldd  B_re,B_im, len(b)
+    xm.ldd  C_re,C_im, len(c)
+    xm.fmacc A_re, A_re, B_re, C_re   // A[k].re += B[k].re * C[k].re
+    xm.fmacc A_re, A_re, C_im, B_im   // A[k].re += C[k].im * B[k].im 
+    xm.fsub C_im, t3, C_im     // C_im <--  -C[k].im 
+    xm.fmacc A_im, A_im, B_re, C_im   // A[k].re -= B[k].re * C[k].im 
+    xm.fmacc A_im, A_im, B_im, C_re  // A[k].re  += B[k].im * C[k].re
+    xm.std  A_re,A_im, len(a)
+  { addi len, len, -1             ; xm.bt len, .L_loop_top         }
+.L_loop_bot:
+
+.L_done:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s7,s6,16
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+    
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS;     .globl	FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ 
+	.set	FUNC_NAME.maxcores,1;                  .globl	FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ 
+	.set	FUNC_NAME.maxtimers,0;                 .globl	FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ 
+	.set	FUNC_NAME.maxchanends,0;               .globl	FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ 
+.Ltmp1:
+	.size	FUNC_NAME, .Ltmp1-FUNC_NAME
+
+#undef NSTACKWORDS
+
+
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_mul.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_mul.S
new file mode 100644
index 00000000..ddd3c84d
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_mul.S
@@ -0,0 +1,81 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+/*
+  Complex multiply
+
+  a[k] = b[k] (*c) ( conjugate(c[k]) )
+
+  It is safe to use the same argument twice, so
+    vect_complex_f32_conj_mul(x[], x[], y[])  -->   x *= y
+
+  Note: a[], b[] and c[] must all be 8-byte aligned
+
+  void vect_complex_f32_conj_mul(
+      complex_float_t a[],
+      const complex_float_t b[],
+      const complex_float_t c[],
+      const unsigned length);
+
+*/
+
+#define FUNC_NAME     vect_complex_f32_conj_mul
+#define NSTACKWORDS   12
+
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+#define a         x10
+#define b         x11
+#define c         x12
+#define len       x13
+#define B_re      x18
+#define B_im      x19
+#define C_re      x20
+#define C_im      x21
+#define A_re      x22
+#define A_im      x23
+
+.p2align 4
+FUNC_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+  xm.stdsp  s7,s6,16
+
+{ li t3, 0                  ; addi len, len, -1             }
+.L_loop_top:
+    xm.ldd  B_re,B_im, len(b)
+    xm.ldd  C_re,C_im, len(c)
+
+    xm.fmul A_re, B_re, C_re         // A[k].re  = B[k].re * C[k].re
+    xm.fmacc A_re, A_re, B_im, C_im  // A[k].re += B[k].im * C[k].im
+    xm.fsub C_im, t3, C_im          // C_im <--  -C[k].im
+    xm.fmul A_im, B_re, C_im         // A[k].im  = B_re * -C[k].im
+    xm.fmacc A_im, A_im, C_re, B_im  // A[k].im += C_re * B[k].im
+
+    xm.std  A_re,A_im, len(a)
+  { addi len, len, -1             ; xm.bt len, .L_loop_top         }
+.L_loop_bot:
+
+.L_done:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s7,s6,16
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+    
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS;     .globl	FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ 
+	.set	FUNC_NAME.maxcores,1;                  .globl	FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ 
+	.set	FUNC_NAME.maxtimers,0;                 .globl	FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ 
+	.set	FUNC_NAME.maxchanends,0;               .globl	FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ 
+.Ltmp1:
+	.size	FUNC_NAME, .Ltmp1-FUNC_NAME
+
+#undef NSTACKWORDS
+
+
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_macc.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_macc.S
new file mode 100644
index 00000000..9d65c0e8
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_macc.S
@@ -0,0 +1,77 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+/*
+  Complex multiply-accumulate
+
+  a[k] = a[k] + b[k] (*c) c[k]
+
+  Note: a[], b[] and c[] must all be 8-byte aligned
+
+  void vect_complex_f32_macc(
+      complex_float_t a[],
+      const complex_float_t b[],
+      const complex_float_t c[],
+      const unsigned length);
+
+*/
+
+#define FUNC_NAME     vect_complex_f32_macc
+#define NSTACKWORDS   12
+
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+#define a         x10
+#define b         x11
+#define c         x12
+#define len       x13
+#define B_re      x18
+#define B_im      x19
+#define C_re      x20
+#define C_im      x21
+#define A_re      x22
+#define A_im      x23
+
+.p2align 4
+FUNC_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+  xm.stdsp  s7,s6,16
+
+{ li t3, 0                  ; addi len, len, -1             }
+.L_loop_top:
+    xm.ldd  A_re,A_im, len(a)
+    xm.ldd  B_re,B_im, len(b)
+    xm.ldd  C_re,C_im, len(c)
+    xm.fmacc A_im, A_im, B_re, C_im   // A[k].im += B[k].re * C[k].im
+    xm.fmacc A_im, A_im, C_re, B_im  // A[k].im  += C[k].re * B[k].im
+    xm.fsub B_im, t3, B_im    // B_im <-- 0 - B[k].im = -B[k].im
+    xm.fmacc A_re, A_re, B_re, C_re   // A[k].re += B[k].re * C[k].re
+    xm.fmacc A_re, A_re, B_im, C_im  // A[k].re  -= B[k].im * C[k].im
+    xm.std  A_re,A_im, len(a)
+  { addi len, len, -1             ; xm.bt len, .L_loop_top         }
+.L_loop_bot:
+
+.L_done:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s7,s6,16
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+    
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS;     .globl	FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ 
+	.set	FUNC_NAME.maxcores,1;                  .globl	FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ 
+	.set	FUNC_NAME.maxtimers,0;                 .globl	FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ 
+	.set	FUNC_NAME.maxchanends,0;               .globl	FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ 
+.Ltmp1:
+	.size	FUNC_NAME, .Ltmp1-FUNC_NAME
+
+#undef NSTACKWORDS
+
+
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_mul.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_mul.S
new file mode 100644
index 00000000..c506e646
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_mul.S
@@ -0,0 +1,82 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+/*
+  Complex multiply
+
+  a[k] = b[k] (*c) c[k]
+
+  It is safe to use the same argument twice, so
+    vect_complex_f32_mul(x[], x[], y[])  -->   x *= y
+
+  Note: a[], b[] and c[] must all be 8-byte aligned
+
+  void vect_complex_f32_mul(
+      complex_float_t a[],
+      const complex_float_t b[],
+      const complex_float_t c[],
+      const unsigned length);
+
+*/
+
+#define FUNC_NAME     vect_complex_f32_mul
+#define NSTACKWORDS   12
+
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+#define a         x10
+#define b         x11
+#define c         x12
+#define len       x13
+#define B_re      x18
+#define B_im      x19
+#define C_re      x20
+#define C_im      x21
+#define A_re      x22
+#define A_im      x23
+
+
+.p2align 4
+FUNC_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+  xm.stdsp  s7,s6,16
+
+{ li t3, 0                  ; addi len, len, -1             }
+.L_loop_top:
+    xm.ldd  B_re,B_im, len(b)
+    xm.ldd  C_re,C_im, len(c)
+
+    xm.fmul A_im, B_re, C_im         // A[k].im = B[k].re * C[k].im
+    xm.fmacc A_im, A_im, C_re, B_im  // A[k].im += C[k].re * B[k].im
+    xm.fsub B_im, t3, B_im
+    xm.fmul A_re, B_re, C_re
+    xm.fmacc A_re, A_re, B_im, C_im
+  
+    xm.std  A_re,A_im, len(a)
+  { addi len, len, -1             ; xm.bt len, .L_loop_top         }
+.L_loop_bot:
+
+.L_done:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  xm.lddsp  s7,s6,16
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+    
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS;     .globl	FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ 
+	.set	FUNC_NAME.maxcores,1;                  .globl	FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ 
+	.set	FUNC_NAME.maxtimers,0;                 .globl	FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ 
+	.set	FUNC_NAME.maxchanends,0;               .globl	FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ 
+.Ltmp1:
+	.size	FUNC_NAME, .Ltmp1-FUNC_NAME
+
+#undef NSTACKWORDS
+
+
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_add.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_add.S
new file mode 100644
index 00000000..197adba3
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_add.S
@@ -0,0 +1,88 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+/*
+  Note: This works for real or complex floats, just use double the length for complex.
+
+  a[k] = b[k] + c[k]
+
+  It is safe to use the same argument twice, so
+    vect_f32_add(x[], x[], y[])  -->   x += y
+
+  Note: a[], b[] and c[] must all be 8-byte aligned
+
+  void vect_f32_add(
+      float a[],
+      const float b[],
+      const float c[],
+      const unsigned length);
+
+*/
+
+#define FUNC_NAME     vect_f32_add
+#define NSTACKWORDS   8
+
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+#define a         x10
+#define b         x11
+#define c         x12
+#define len       x13
+#define B0        x18
+#define B1        x19
+#define C0        x20
+#define C1        x21
+
+.p2align 4
+FUNC_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+{ mv t3, len                ; xm.zexti len, 1                 }
+{ srli len, t3, 1             ; xm.brff len, .L_even             }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+.L_odd:
+  { addi t3, t3, -1             ; nop                           }
+  { nop                             ; xm.ldw B0, t3            (b)}
+  { nop                             ; xm.ldw C0, t3            (c)}
+    xm.fadd B0, B0, C0
+    xm.stw B0, t3(a)
+.L_even:
+
+{ addi len, len, -1           ; xm.brff len, .L_loop_bot           }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+{ nop                           ; xm.bu .L_loop_top                }
+
+.p2align 4
+.L_loop_top:
+    xm.ldd  B0,B1, len(b)
+    xm.ldd  C0,C1, len(c)
+    xm.fadd B0, B0, C0
+    xm.fadd B1, B1, C1
+    xm.std  B0,B1, len(a)
+  { addi len, len, -1             ; xm.bt len, .L_loop_top         }
+.L_loop_bot:
+
+.L_done:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  addi a0, t3, 0
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+    
+	
+	// RETURN_REG_HOLDER
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS;     .globl	FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ 
+	.set	FUNC_NAME.maxcores,1;                  .globl	FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ 
+	.set	FUNC_NAME.maxtimers,0;                 .globl	FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ 
+	.set	FUNC_NAME.maxchanends,0;               .globl	FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ 
+.Ltmp1:
+	.size	FUNC_NAME, .Ltmp1-FUNC_NAME
+
+#undef NSTACKWORDS
+
+
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_dot.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_dot.S
new file mode 100644
index 00000000..e676c3fa
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_dot.S
@@ -0,0 +1,118 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+/*
+
+  float vect_f32_dot(
+      const float b[],
+      const float c[],
+      const unsigned length);
+
+*/
+
+#define FUNC_NAME     vect_f32_dot
+#define NSTACKWORDS   8
+
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+.p2align 4
+FUNC_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdsp  s3,s2,0
+  xm.stdsp  s5,s4,8
+
+{ mv a3, a2                  ; xm.zexti a2, 1                  }
+{ xm.brff a2, .even                ; li t3, 0                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.odd:
+  // Deal with tail first
+  addi a3, a3, -1
+  xm.ldw s2, a3(a0)
+  xm.ldw s3, a3(a1)
+  xm.fmacc t3, t3, s2, s3
+
+.even:
+
+// 4 possibilities:
+//    b[] and c[] are (both) DWORD aligned
+//    c[] and c[] are (both) not DWORD aligned
+//    b[] or c[] is DWORD aligned, and the other is not.
+// Figure out which situation applies, because it will affect whether we can
+// do load-doubles and whether the two vectors are aligned if we do.
+{ srli s4, a0, 2               ; srli s5, a1, 2               }
+{ xm.zexti s4, 1                  ; xm.zexti s5, 1                  }
+{ slli s4, s4, 1               ; mv a2, a3                  }
+{ or s4, s4, s5               ; xm.brff a2, .done                }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+{ addi a2, a2, -1               ; xm.bru s4                      }
+  tail .together
+  tail .r1_odd
+  tail .r0_odd
+
+// b[] and c[] are both not DWORD aligned.
+// deal with final element, and shift pointers to be DWORD aligned
+.r0r1_odd:
+  { addi a0, a0, -4               ; xm.ldw s4, a2              (a0)}
+  { addi a1, a1, -4               ; xm.ldw s5, a2              (a1)}
+    srli a2, a2, 1
+  .r0r1_odd_loop:
+      xm.fmacc t3, t3, s4, s5
+      xm.ldd  s4,s2, a2(a0)
+      xm.ldd  s5,s3, a2(a1)
+      xm.fmacc t3, t3, s2, s3
+    { addi a2, a2, -1               ; xm.bt  a2, .r0r1_odd_loop      }
+  .r0r1_odd_loop_done:
+    tail .done
+
+// c[] was odd and b[] even.
+// Since the operands are symmetric (doesn't matter which is which), we can just
+// swap pointers and pretend it was the other way around.
+.r1_odd:
+  { mv a0, a1                  ; mv a1, a0                  }
+// b[] was odd and c[] even.
+.r0_odd:
+  { srli a2, a2, 1               ; xm.ldw s4, a2              (a0)}
+    addi a0, a0, -4
+  .r0_odd_loop:
+      xm.ldd  s5,s3, a2(a1)
+      xm.fmacc t3, t3, s4, s3
+      xm.ldd  s4,s2, a2(a0)
+      xm.fmacc t3, t3, s2, s5
+    { addi a2, a2, -1               ; xm.bt  a2, .r0_odd_loop        }
+  .r0_odd_loop_done:
+    tail .done
+
+  nop
+
+.together:
+    srli a2, a2, 1
+  .together_loop:
+      xm.ldd  s4,s2, a2(a0)
+      xm.ldd  s5,s3, a2(a1)
+      xm.fmacc t3, t3, s2, s3
+      xm.fmacc t3, t3, s4, s5
+    { addi a2, a2, -1               ; xm.bt  a2, .together_loop      }
+
+.done:
+  xm.lddsp  s3,s2,0
+  xm.lddsp  s5,s4,8
+  addi a0, t3, 0
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+    
+	
+	// RETURN_REG_HOLDER
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS;     .globl	FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ 
+	.set	FUNC_NAME.maxcores,1;                  .globl	FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ 
+	.set	FUNC_NAME.maxtimers,0;                 .globl	FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ 
+	.set	FUNC_NAME.maxchanends,0;               .globl	FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ 
+.Ltmp1:
+	.size	FUNC_NAME, .Ltmp1-FUNC_NAME
+
+#undef NSTACKWORDS
+
+
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_max_exponent.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_max_exponent.S
new file mode 100644
index 00000000..26e9af48
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_max_exponent.S
@@ -0,0 +1,72 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+
+/*
+
+  exponent_t vect_f32_max_exponent(
+      const float b[], 
+      const unsigned length);
+       
+*/
+
+#define NSTACKWORDS 4
+#define FUNC_NAME vect_f32_max_exponent
+
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+#define b     x10
+#define len   x11
+
+.p2align 4
+FUNC_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,0
+
+  { mv t3, len                ; xm.mkmski a2, 32                }
+  { slli a2, a2, 16              ; xm.zexti t3, 1                 }
+  { srli len, len, 1             ; xm.brff t3, .L_even_elms        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+// Handle the tail first
+    xm.ldd  s3,s2, len(b)
+    xm.fsexp t3, s3, s3
+  { mv a2, s3                  ; nop                             }
+
+.L_even_elms:
+  { addi len, len, -1             ; xm.brff len, .loop_end           }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+  .loop:
+      xm.ldd  s3,s2, len (b)
+      xm.fsexp t3, s2, s2
+      xm.fsexp a3, s3, s3
+    { xm.slt t3, s2, a2             ; nop                             }
+    { xm.slt t3, s3, a2             ; xm.bt t3, .not                }
+    { xm.slt t3, s3, s2             ; mv a2, s2                  }
+    .not:   
+        bnez t3, .not2
+        mv a2, s3
+    .not2:
+      { addi len, len, -1             ; xm.bt  len, .loop              }
+  .loop_end:
+
+    xm.lddsp  s3,s2,0
+    li a0, 30
+    sub a0, a2, a0
+    xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+	
+	// RETURN_REG_HOLDER
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS;  .globl	FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ 
+	.set	FUNC_NAME.maxcores,1;              	.globl	FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ 
+	.set	FUNC_NAME.maxtimers,0;              .globl	FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ 
+	.set	FUNC_NAME.maxchanends,0;            .globl	FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ 
+.Ltmp0:
+	.size	FUNC_NAME, .Ltmp0-FUNC_NAME
+
+#undef NSTACKWORDS
+        
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_to_s32.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_to_s32.S
new file mode 100644
index 00000000..0a91e8a3
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_to_s32.S
@@ -0,0 +1,90 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+
+/*
+
+  void vect_f32_to_vect_s32(
+      int32_t a[],
+      const float b[], 
+      const unsigned length,
+      const exponent_t exp);
+       
+*/
+
+#define NSTACKWORDS   8
+#define FUNC_NAME     vect_f32_to_vect_s32
+
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+#define a         x10
+#define b         x11
+#define len       x12
+#define exp       x13
+
+#define mant1     x18
+#define mant0     x19
+#define tmp       x20
+
+
+.p2align 4
+FUNC_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,0
+    xm.stdsp  s5,s4,8
+
+    //handle tail first
+  { srli t3, len, 1             ; li tmp, 23                 }
+  { xm.zexti len, 1                 ; add exp, exp, tmp           }
+  { mv len, t3                ; xm.brff len, .L_pre_loop         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    xm.ldd  mant0,mant1, len(b)
+    xm.fsexp t3, tmp, mant0
+    xm.fmant mant0, mant0
+  { sub tmp, tmp, exp           ; xm.brff t3, .L_tail_pos         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { xm.neg mant0, mant0            ; nop                             }
+.L_tail_pos:
+  { xm.shl mant0, mant0, tmp       ; xm.shl t3, len, 1             }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli mant0, mant0, tmp       \nMessage: The shift amount is not 32" */
+    xm.stw mant0, t3(a)
+
+.L_pre_loop:
+  { addi len, len, -1             ; xm.brff len, .L_loop_end         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+  .L_loop:
+      xm.ldd  mant0,mant1, len(b)
+      xm.fsexp t3, tmp, mant1
+      xm.fmant mant1, mant1
+    { sub tmp, tmp, exp           ; xm.brff t3, .L_not3             }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+      xm.neg mant1, mant1
+    .L_not3:
+        xm.shl mant1, mant1, tmp/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mant1, mant1, tmp\nMessage: The shift amount is not 32" */
+        xm.fsexp t3, tmp, mant0
+        xm.fmant mant0, mant0
+      { sub tmp, tmp, exp           ; xm.brff t3, .L_not4             }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.neg mant0, mant0
+    .L_not4:
+        xm.shl mant0, mant0, tmp/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mant0, mant0, tmp\nMessage: The shift amount is not 32" */
+        xm.std  mant0,mant1, len(a)
+      { addi len, len, -1             ; xm.bt  len, .L_loop            }
+  .L_loop_end:
+
+
+      xm.lddsp  s3,s2,0
+      xm.lddsp  s5,s4,8
+      xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+	
+	// RETURN_REG_HOLDER
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS;  .globl	FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ 
+	.set	FUNC_NAME.maxcores,1;              	.globl	FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ 
+	.set	FUNC_NAME.maxtimers,0;              .globl	FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ 
+	.set	FUNC_NAME.maxchanends,0;            .globl	FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ 
+.Ltmp0:
+	.size	FUNC_NAME, .Ltmp0-FUNC_NAME
+
+#undef NSTACKWORDS
+        
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs.S
new file mode 100644
index 00000000..4f0d6ad4
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs.S
@@ -0,0 +1,124 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKWORDS     (20)
+#define STACK_TMP_VEC       2
+
+#define a           x10
+#define b           x11
+#define len         x12
+#define tail        x13
+
+
+
+/*  
+headroom_t vect_s16_abs(
+    int16_t a[],
+    const int16_t b[],
+    const unsigned length);
+*/
+
+vect_s16_abs:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+        li t3, 0x0100
+    {   slli tail, len, SIZEOF_LOG2_S16          ;   srli len, len, EPV_LOG2_S16              }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, len, SIZEOF_LOG2_S16          \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.vsetc t3}
+    { nop                                           ;   xm.bu .L_apply_op                          }
+
+.L_func_end_s16:
+
+#undef a
+#undef b
+#undef len
+
+/*
+    When branching here:
+        *   a --> x10
+        *   b --> x11
+        *   loop_count --> x12
+        *   tail --> x13
+        *   VPU mode must already be set.
+*/
+
+#define a           x10
+#define b           x11
+#define loop_count  x12
+#define tail        x13
+
+.type .L_apply_op,@function
+
+.L_apply_op:
+
+    {   xm.mkmsk tail, tail                        ; nop                                           }
+    {   mv s3, b                              ;   xm.brff loop_count, .L_loop_bot              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   li a1, 32                              ;   xm.bu .L_loop_top                          }
+.p2align 4
+.L_loop_top:
+        {   addi loop_count, loop_count, -1           ;   xm.vldr s3}
+        { nop                                           ;   xm.vsign                                   }
+        { nop                                           ;   xm.vlmul0 s3}
+        xm.vlmul1 s3
+
+        {addi a1,sp, (STACK_TMP_VEC)*4             ;  xm.vgetc t3} 
+        {nop; xm.vstr a1}
+        {li a1, 32; xm.vladd a1}
+        {nop; xm.vsetc t3}
+
+        {   add a, a, a1                            ;   xm.vstr a}
+        {   add s3, s3, a1                        ;   xm.bt loop_count, .L_loop_top              }
+.L_loop_bot:
+
+    { nop                                           ;   xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vldr s3}
+    { nop                                           ;   xm.vsign                                   }
+        
+
+    {   nop             ;   xm.vlmul0 s3} 
+    xm.vlmul1 s3
+
+    { addi s3,sp, (STACK_TMP_VEC)*4             ;   xm.vgetc t3} 
+    { nop; xm.vstr s3}
+    { nop; xm.vladd s3}
+       
+    { addi s3,sp, (STACK_TMP_VEC)*4             ;   xm.vsetc t3}
+    { nop                                           ;   xm.vstd s3}
+    { nop                                           ;   xm.vpos                                    }
+        xm.vstrpv s3, tail
+    { nop                                           ;   xm.vldr s3}
+    { nop                                           ;   xm.vstr s3}
+        xm.vstrpv a, tail
+
+.L_finish:
+    {   li a0, 32                              ;   xm.vgetc t3}
+    {   srli a1, t3, 8                          ; nop                                           }
+    {   xm.zexti t3, 5                             ;   xm.shr a0, a0, a1                          }
+    {   addi t3, t3, 1                         ; nop                                           }
+     xm.lddsp  s3,s2,0             
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_end_apply_op: 
+.size .L_apply_op, .L_end_apply_op - .L_apply_op
+
+
+
+
+
+.global vect_s16_abs
+.type vect_s16_abs,@function
+.set vect_s16_abs.nstackwords,NSTACKWORDS;  .global vect_s16_abs.nstackwords /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s16_abs.maxcores,1;               .global vect_s16_abs.maxcores /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s16_abs.maxtimers,0;              .global vect_s16_abs.maxtimers /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s16_abs.maxchanends,0;            .global vect_s16_abs.maxchanends /* Translation error on this line: unexpected token at position 31. */ 
+.size vect_s16_abs, .L_func_end_s16 - vect_s16_abs
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs_sum.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs_sum.S
new file mode 100644
index 00000000..c668eb19
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs_sum.S
@@ -0,0 +1,150 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+int32_t vect_s16_abs_sum(
+    const int16_t b[],
+    const unsigned length);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (8+8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s16_abs_sum
+
+#define STACK_VEC_TMP       (NSTACKWORDS-24-4)
+#define STACK_VEC_VR        (NSTACKWORDS-32-4)
+#define STACK_VEC_TMP2      (NSTACKWORDS-8-2)
+
+#define b           x10 
+#define N           x11 
+#define tail        x12 
+#define tmp         x13
+#define neg_1       x18
+#define pos_2       x19
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+        li t3, 0x0100
+
+     {   addi s4, sp, (STACK_VEC_TMP2)*4        ;   nop                                  }
+         addi s5, s4, (-30)
+
+    {   slli tail, N, SIZEOF_LOG2_S16            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S16            \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.vclrdr                                  }
+    {   srli N, N, EPV_LOG2_S16                  ;   xm.mkmsk tail, tail                        }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S16                  \nMessage: The shift amount is not 32" */
+        la t3, vpu_vec_0x0002
+    {   mv pos_2, t3                          ; nop                                           }
+        la t3, vpu_vec_neg_1
+    {   mv neg_1, t3                          ;   xm.vldc t3}
+    {   slli tmp, N, 5                           ; nop                                           }
+    {   add t3, b, tmp                         ;   xm.brff tail, .L_tail_dealt_with             }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   xm.vldr t3}
+    { nop                                           ;   xm.vstd tmp}
+        xm.vstrpv tmp, tail
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vlmaccr0 tmp}
+    xm.vlmaccr1 tmp
+
+    {nop ; xm.vstd s4}    
+    {nop ; xm.vldd s5}
+    {nop ; xm.vstr s4}
+    {nop ; xm.vldr s5}
+
+
+    {   mv t3, tmp                            ;   xm.vstr t3}
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vpos                                    }
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vstr t3}
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vldc pos_2}
+    { nop                                           ;   xm.vlmaccr0 tmp}
+    xm.vlmaccr1 tmp
+
+    {nop ; xm.vstd s4}    
+    {nop ; xm.vldd s5}
+    {nop ; xm.vstr s4}
+    {nop ; xm.vldr s5}
+
+
+
+
+.L_tail_dealt_with:
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ; nop                                           }
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.brff N, .L_loop_bot                       }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+.L_loop_top:
+        { nop                                             ;   xm.vldc neg_1}
+        { nop                                             ;   xm.vlmaccr0 b}
+        xm.vlmaccr1 b
+
+        {nop ; xm.vstd s4}    
+        {nop ; xm.vldd s5}
+        {nop ; xm.vstr s4}
+        {nop ; xm.vldr s5}
+
+
+        {   mv t3, b                                ;   xm.vstr t3}
+        {   li t3, 32                               ;   xm.vldr t3}
+        {   add b, b, t3                             ;   xm.vpos                                    }
+        {   addi t3,sp, (STACK_VEC_VR)*4                ;   xm.vstr tmp}
+        { nop                                             ;   xm.vldr t3}
+        { nop                                             ;   xm.vldc pos_2}
+        {   addi N, N, -1                               ;   xm.vlmaccr0 tmp}
+        xm.vlmaccr1 tmp
+
+        {nop ; xm.vstd s4}    
+        {nop ; xm.vldd s5}
+        {nop ; xm.vstr s4}
+        {nop ; xm.vldr s5}
+
+
+        { nop                                             ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+
+.L_finish:
+
+
+   
+
+
+    { addi a1,sp, (STACK_VEC_TMP)*4            ; nop/*  xm.vadddr */    }
+      addi s4, a1, 32-2                           
+    { nop                                      ;   xm.vstd a1}
+    { nop                                      ;  lw a0, 0(s4)}
+    {   slli a0, a0, 16                        ;   xm.vstr a1}
+    { nop                                      ;  lw a1, 0(s4)}
+        xm.lddsp  s3,s2,0                               
+        xm.lddsp  s5,s4,8
+    { xm.zexti a1, 16                          ;   nop}
+    {   or a0, a0, a1                          ;   xm.retsp (NSTACKWORDS)*4                       }
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_end: 
+    .size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmax.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmax.S
new file mode 100644
index 00000000..cf1e263d
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmax.S
@@ -0,0 +1,165 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+unsigned vect_s16_argmax(
+    const int16_t b[],
+    const unsigned length);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (3)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s16_argmax
+
+#define STACK_VEC_MAX_DEX   (NSTACKWORDS-8-4)
+#define STACK_VEC_CUR_MAX   (NSTACKWORDS-16-4)
+#define STACK_VEC_CUR_DEX   (NSTACKWORDS-24-4)
+
+#define STACK_N     6
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define vec_16s     x12      // ![0x%X]
+#define tmp         x13      // ![%d]
+#define tmz         x18      // ![%d]
+#define cur_max     x19      // ![0x%08X]
+#define mask_0xF    x20      // ![0x%04X]
+
+
+FUNCTION_NAME:
+
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+    li t3, 0x100  //16-bit mode
+{   xm.mkmski mask_0xF, 4                       ;   sw N, (STACK_N)*4                      (sp)}
+{   srli N, N, 4                             ;   xm.vsetc t3}
+
+// cur_max[i] = -0x8000
+    la t3, vpu_vec_0x8000
+{   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   xm.vldr t3}
+{ nop                                           ;   xm.vstr t3}
+
+// cur_dex[i] = i
+{   addi tmp,sp, (STACK_VEC_CUR_DEX)*4         ;   li t3, 15                             }
+.L_setup_cur_dex:
+        xm.st16 t3,  t3(tmp)
+    {   addi t3, t3, -1                     ;   xm.bt t3, .L_setup_cur_dex                    }
+
+// max_dex[i] = -1
+    la t3, vpu_vec_neg_1
+{   addi t3,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+{   addi cur_max,sp, (STACK_VEC_CUR_MAX)*4     ;   xm.vstr t3}
+
+    la t3, vpu_vec_0x0010
+{   mv vec_16s, t3                        ;   xm.vclrdr                                  }
+{   mv t3, b                              ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+
+/*
+    - compute  cur_max[k] - b[k]
+    - create a mask from values less than 0
+        cur_max[k] - b[k] < 0  -->  b[k] > cur_max[k]
+    - overwrite cur_max[k] and max_dex[k] where  b[k] > cur_max[k]
+    - increment cur_dex[k] by 1 (all cur_dex[] elements are  the same)
+    - repeat on next 16 elements of b[]
+
+    Note: This replaces max_dex[k] when b[k] > cur_max[k], NOT when b[k] >= cur_max[k],
+          so this loop prefers earlier indexes
+
+*/
+.L_loop_top:
+    {   mv b, t3                              ;   xm.vldr t3}
+    {   addi N, N, -1                             ;   xm.vlsub cur_max}
+    {   addi t3,sp, 0                         ;   xm.vdepth1                                 }
+        xm.vstrpv t3, mask_0xF
+    {   mv t3, b                              ;   lw tmp, 0                          (sp)}
+    {   mv tmz, tmp                            ; nop                                           }
+xm.zip tmz, tmp, 0                                                                      
+    {   addi t3,sp, (STACK_VEC_CUR_DEX)*4         ;   xm.vldr t3}
+        xm.vstrpv cur_max, tmp                                                                
+    {   addi tmz,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3} 
+        xm.vstrpv tmz, tmp                                                                    
+    { nop                                           ;   xm.vladd vec_16s}
+    {   li t3, 32                             ;   xm.vstr t3} 
+    {   add t3, b, t3                         ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+{ nop                                           ;   lw N, (STACK_N)*4                      (sp)}
+{   xm.zexti N, 4                               ; nop                                           }
+{   xm.mkmsk N, N                              ;   xm.brff N, .L_no_tail                        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+{   mv b, t3                              ;   xm.vldr t3}
+{   addi t3,sp, 0                         ;   xm.vlsub cur_max}
+{ nop                                           ;   xm.vdepth1                                 }
+    xm.vstrpv t3, mask_0xF
+{   mv t3, b                              ;   lw tmp, 0                          (sp)}
+{   and tmp, tmp, N                         ;   and tmz, tmp, N                         }
+xm.zip tmz, tmp, 0
+{   addi t3,sp, (STACK_VEC_CUR_DEX)*4         ;   xm.vldr t3}
+    xm.vstrpv cur_max, tmp
+{   addi tmz,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+    xm.vstrpv tmz, tmp      
+    
+.L_no_tail:
+
+#undef cur_max
+#undef vec_16s
+#undef mask_0xF
+
+#define cur_max     x19  // ![%d]
+#define max_dex     x12  // ![0x%08X]
+
+{   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   li N, 15                               }
+{xm.ldawsp x20, STACK_VEC_MAX_DEX *4 ; nop} 
+xm.ld16s cur_max, N(x28)
+xm.ld16s max_dex,  N(x20)
+{   addi N, N, -1                             ;   nop}
+.L_loop2_top:                                         ;   
+xm.ld16s x10, N(x28)
+slt tmp, x10, cur_max  
+xm.ld16s x21, N(x20)
+xm.eq tmz, a0, cur_max                     
+{nop ; xm.bt tmp, .L_less_than                    }
+    .L_greater_or_equal:
+        {    xm.slt tmp, s5, max_dex                   ;   xm.brff tmz, .L_greater                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_equal:
+            { nop                                           ;   xm.brff tmp, .L_less_than                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_greater:
+            {   mv cur_max, a0                         ;   mv max_dex, s5                         }
+
+    .L_less_than:
+    {   addi N, N, -1                             ;   xm.bt N, .L_loop2_top                      }
+
+    xm.lddsp  s5,s4,16
+    xm.lddsp  s3,s2,8
+{   mv a0, max_dex                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_end: 
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmin.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmin.S
new file mode 100644
index 00000000..7a234042
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmin.S
@@ -0,0 +1,184 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+unsigned vect_s16_argmin(
+    const int16_t b[],
+    const unsigned length);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (3)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s16_argmin
+
+#define STACK_VEC_MAX_DEX   (NSTACKWORDS-8-4)
+#define STACK_VEC_CUR_MAX   (NSTACKWORDS-16-4)
+#define STACK_VEC_CUR_DEX   (NSTACKWORDS-24-4)
+
+#define STACK_N     6
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define vec_16s     x12      // ![0x%X]
+#define tmp         x13      // ![%d]
+#define tmz         x18      // ![%d]
+#define cur_min     x19      // ![0x%08X]
+#define mask_0xF    x20      // ![0x%04X]
+#define vec_ones    x21      // ![0x%08X]
+
+
+FUNCTION_NAME:
+
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+    li t3, 0x100
+{   xm.mkmski mask_0xF, 4                       ;   sw N, (STACK_N)*4                      (sp)}
+{   srli N, N, 4                             ;   xm.vsetc t3}
+
+// cur_min[i] = 0x7FFF
+    la t3, vpu_vec_0x7FFF
+{   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   xm.vldr t3}
+{ nop                                           ;   xm.vstr t3}
+
+// cur_dex[i] = i
+{   addi tmp,sp, (STACK_VEC_CUR_DEX)*4         ;   li t3, 15                             }
+.L_setup_cur_dex:
+        xm.st16 t3,  t3(tmp)
+    {   addi t3, t3, -1                     ;   xm.bt t3, .L_setup_cur_dex                    }
+
+// min_dex[i] = -1
+    la t3, vpu_vec_neg_1
+{   addi t3,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+{   addi cur_min,sp, (STACK_VEC_CUR_MAX)*4     ;   xm.vstr t3}
+
+    la t3, vpu_vec_0x0010
+{   mv vec_16s, t3                        ;   xm.vclrdr                                  }
+    la t3, vpu_vec_0x0001
+{   mv vec_ones, t3                       ; nop                                           }
+{   mv t3, b                              ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+
+/*
+
+    vR[k] = b[k]
+    vR[k] = cur_min[k] - b[k]
+    vR[k] = (cur_min[k] - b[k]) < 0 = cur_min[k] < b[k]
+
+    vR[k] = !vR[k] = !(cur_min[k] < b[k]) = (cur_min[k] >= b[k])
+
+    So, this will replace the indexes if b[k] is equal to cur_min[k]
+
+    Instead, we want:
+    
+    !vR[k] = (cur_min[k] > b[k])
+           = cur_min[k] >= b[k] + 1
+
+    vR[k] = !(cur_min[k] >= b[k] + 1)
+    vR[k] = cur_min[k] < b[k] + 1
+    vR[k] = cur_min[k] - (b[k]+1) < 0
+    vR[k] = cur_min[k] - (b[k] + 1)
+
+
+
+*/
+
+
+.L_loop_top:
+    {   mv b, t3                              ;   xm.vldr t3}
+    { nop                                           ;   xm.vladd vec_ones}
+    {   addi N, N, -1                             ;   xm.vlsub cur_min}
+    {   addi t3,sp, 0                         ;   xm.vdepth1                                 }
+        xm.vstrpv t3, mask_0xF
+    {   mv t3, b                              ;   lw tmp, 0                          (sp)}
+    {   xm.not tmz, tmp                            ;   xm.not tmp, tmp                            }
+xm.zip tmz, tmp, 0                                                                
+    {   addi t3,sp, (STACK_VEC_CUR_DEX)*4         ;   xm.vldr t3}
+        xm.vstrpv cur_min, tmp                                                                
+    {   addi tmz,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3} 
+        xm.vstrpv tmz, tmp                                                                    
+    { nop                                           ;   xm.vladd vec_16s}
+    {   li t3, 32                             ;   xm.vstr t3} 
+    {   add t3, b, t3                         ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+{ nop                                           ;   lw N, (STACK_N)*4                      (sp)}
+{   xm.zexti N, 4                               ; nop                                           }
+{   xm.mkmsk N, N                              ;   xm.brff N, .L_no_tail                        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+{   mv b, t3                              ;   xm.vldr t3}
+{ nop                                           ;   xm.vladd vec_ones}
+{   addi t3,sp, 0                         ;   xm.vlsub cur_min}
+{ nop                                           ;   xm.vdepth1                                 }
+    xm.vstrpv t3, mask_0xF
+{   mv t3, b                              ;   lw tmp, 0                          (sp)}
+{   xm.not tmp, tmp                            ; nop                                           }
+{   and tmp, tmp, N                         ;   and tmz, tmp, N                         }
+xm.zip tmz, tmp, 0
+{   addi t3,sp, (STACK_VEC_CUR_DEX)*4         ;   xm.vldr t3}
+    xm.vstrpv cur_min, tmp
+{   addi tmz,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+    xm.vstrpv tmz, tmp      
+
+.L_no_tail:
+
+#undef cur_min
+#undef vec_16s
+#undef mask_0xF
+
+#define cur_min     x19  // ![%d]
+#define min_dex     x12  // ![0x%08X]
+
+
+{   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   li N, 15                               }
+{   xm.ldawsp x20, (STACK_VEC_MAX_DEX)*4 ; nop}
+xm.ld16s cur_min, N(t3)
+xm.ld16s min_dex, N(x20)
+{   addi N, N, -1                             ;   nop}
+.L_loop2_top:
+xm.ld16s x10, N(x28)
+xm.slt tmp, cur_min, x10
+xm.ld16s x21, N(x20)
+xm.eq tmz, cur_min, a0  
+{nop; xm.bt tmp, .L_greater_than                 }
+    .L_less_or_equal:
+        {    xm.slt tmp, s5, min_dex                   ;   xm.brff tmz, .L_less                         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_equal:
+            { nop                                           ;   xm.brff tmp, .L_greater_than                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_less:
+            {   mv cur_min, a0                         ;   mv min_dex, s5                         }
+
+    .L_greater_than:
+    {   addi N, N, -1                             ;   xm.bt N, .L_loop2_top                      }
+
+    xm.lddsp  s5,s4,16
+    xm.lddsp  s3,s2,8
+{   mv a0, min_dex                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+.L_end: 
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_clip.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_clip.S
new file mode 100644
index 00000000..59fb331d
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_clip.S
@@ -0,0 +1,331 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+headroom_t vect_s16_clip( 
+    int16_t a[],
+    const int16_t b[],
+    const unsigned length,
+    const int16_t lower_bound,
+    const int16_t upper_bound,
+    const int b_shr);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (6)
+#define NSTACKWORDS     (8 + 8*(NSTACKVECS)+4)
+
+#define FUNCTION_NAME   vect_s16_clip
+
+#define STACK_VEC(K)    (NSTACKWORDS - (8*((K)+1))-4)
+
+#define a           x10
+#define b           x11
+#define N           x12
+#define lower       x13
+#define upper       x18
+#define b_shr       x19
+#define tail        x20
+#define tmp1        x21
+#define tmp2        x22
+#define int_max     x23
+#define int_min     x24
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+        li t3, 0x0100
+        xm.stdsp  s7,s6,16
+    {   slli tail, N, SIZEOF_LOG2_S16            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S16            \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   sw s8, 24                          (sp)}
+
+    {   li tmp1, 15                            ;   srli N, N, EPV_LOG2_S16                  }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S16                  \nMessage: The shift amount is not 32" */
+    {   xm.mkmsk int_max, tmp1                     ;   xm.vclrdr                                  }   
+    {  xm.addi int_min, int_max, 1                 ;   xm.mkmsk tail, tail                        }
+
+    // If upper >= 0  and lower <= 0, we can do this more efficiently.
+    mv upper, a4
+    {   li tmp1, 0                             ;   nop}
+    mv b_shr, a5
+    {   xm.slt tmp2, upper, tmp1                   ;   nop}
+    {   xm.slt tmp1, tmp1, lower                   ;   nop                }
+    bnez tmp2, .L_lower_nice 
+    bnez tmp1, .L_upper_nice 
+
+
+    // Otherwise, we have the nice situation.
+.L_nice:
+
+    //In the nice situation, the upper bound is no more than 1 VLADD away from the positive  saturation 
+    //  point of the VPU, and the lower bound is no more than 1 VLADD away from the negative saturation
+    //  point of the VPU. 
+
+    {   addi t3,sp, (STACK_VEC(0))*4              ;   sub upper, int_max, upper               }
+    {   mv tmp1, upper                         ;   mv tmp2, upper                         }
+xm.zip tmp2, tmp1, 4
+    { nop                                           ;   xm.bl .L_std_func1                         }
+
+    {   addi t3,sp, (STACK_VEC(2))*4              ;   xm.neg upper, upper                        }
+    {   mv tmp1, upper                         ;   mv tmp2, upper                         }
+xm.zip tmp2, tmp1, 4
+    { nop                                           ;   xm.bl .L_std_func1                         }
+
+    {   addi t3,sp, (STACK_VEC(1))*4              ;   sub lower, int_min, lower               }
+    {   mv tmp1, lower                         ;   mv tmp2, lower                         }
+xm.zip tmp2, tmp1, 4
+    { nop                                           ;   xm.bl .L_std_func1                         }
+
+    {   addi t3,sp, (STACK_VEC(3))*4              ;   xm.neg lower, lower                        }
+    {   mv tmp1, lower                         ;   mv tmp2, lower                         }
+xm.zip tmp2, tmp1, 4
+    { nop                                           ;   xm.bl .L_std_func1                         }
+
+    { nop                                           ;   xm.bu .L_std_func_end1                     }
+.L_std_func1:
+        xm.stdi  tmp1,tmp1, 0(t3)
+        xm.stdi  tmp1,tmp1, 8(t3)
+        xm.stdi  tmp1,tmp1, 16(t3)
+        xm.stdi  tmp1,tmp1, 24(t3)
+        ret 
+.L_std_func_end1:
+
+#define vec_upper   upper
+#define vec_lower   lower
+#define vec_nupper  tmp1
+#define vec_nlower  tmp2
+#define _32         int_min
+
+//{   nop; xm.ldawsp vec_upper, STACK_VEC(0)*4        }
+//{   nop; xm.ldawsp vec_lower, STACK_VEC(1)*4        }
+//{   nop; xm.ldawsp vec_nupper, STACK_VEC(2)*4       }
+//{   nop; xm.ldawsp vec_nlower, STACK_VEC(3)*4       }
+{addi vec_upper,sp, (STACK_VEC(0))*4 ; nop}
+{addi vec_lower,sp, (STACK_VEC(1))*4 ; nop}
+{addi vec_nupper,sp, (STACK_VEC(2))*4 ; nop}
+{addi vec_nlower,sp, (STACK_VEC(3))*4 ; nop}
+    {   li _32, 32                             ;   xm.brff N, .L_nice_loop_bot                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_nice_loop_top:
+            xm.vlashr b, b_shr
+        {   add b, b, _32                       ;   xm.vladd vec_upper}
+        {   addi N, N, -1                         ;   xm.vladd vec_nupper}
+        { nop                                       ;   xm.vladd vec_lower}
+        { nop                                       ;   xm.vladd vec_nlower}
+        {   add a, a, _32                       ;   xm.vstr a}
+        { nop                                       ;   xm.bt N, .L_nice_loop_top                  }
+    .L_nice_loop_bot:
+    
+     beqz tail, .L_finish          
+        xm.vlashr b, b_shr
+    { nop                                       ;   xm.vladd vec_upper}
+    { nop                                       ;   xm.vladd vec_nupper}
+    { nop                                       ;   xm.vladd vec_lower}
+    { nop                                       ;   xm.vladd vec_nlower}
+    j .L_finishish 
+
+/*
+    C logic:
+
+    void clip16(int16_t output[], int16_t input[], int16_t lower, int16_t upper, unsigned length, int input_shr)
+    {
+        if(upper >= 0 && lower <= 0){
+
+            int16_t up_thing = VPU_INT16_MAX - upper;
+            int16_t lo_thing = VPU_INT16_MIN - lower;
+
+            // 7 instructions required
+            for(unsigned int i = 0; i < length; i++){
+
+                int16_t tmp = input[i] >> input_shr;
+                tmp = SATURATING_ADD(tmp, up_thing);
+                tmp = tmp - up_thing;
+                tmp = SATURATING_ADD(tmp, lo_thing);
+                tmp = tmp - lo_thing
+
+                output[i] = tmp;
+            }
+        } else {
+
+            int16_t one, two, three;
+
+            if(upper >= 0){
+                one = VPU_INT16_MAX - upper;
+                two = VPU_INT16_MIN;
+                three = VPU_INT16_MIN - (lower - upper);
+            } else {
+                one = VPU_INT16_MIN - lower;
+                two = VPU_INT16_MAX;
+                three = VPU_INT16_MAX - (upper - lower);
+            }
+
+            // 9 instructions required
+            for(unsigned int i = 0; i < length; i++){
+
+                int16_t tmp = input[i] >> input_shr;
+                tmp = SATURATING_ADD(tmp, one);
+                tmp = tmp - one;
+                tmp = tmp + two;
+                tmp = SATURATING_ADD(tmp, three);
+                tmp = tmp - three;
+                tmp = tmp - two;
+
+                output[i] = tmp;
+            }
+        }
+    }
+
+*/
+
+
+
+#undef vec_upper 
+#undef vec_lower 
+#undef vec_nupper
+#undef vec_nlower
+#undef _32       
+
+#define vec_one     upper
+#define vec_two     lower
+#define vec_three   tmp1
+
+#define vec_none    tmp2
+#define vec_ntwo    int_max
+#define vec_nthree  int_min
+
+    // The nice thing about the not nice scenario is that at least one of the two bounds is
+    //  guaranteed to be within one VLADD of the relevant saturation point.
+
+.L_upper_nice:
+
+    {   sub vec_one, int_max, upper             ;   xm.neg vec_three, lower                    }
+     addi vec_three, vec_three, -1
+    {   addi vec_two, int_min, 1                ;   xm.bu .L_not_nice_thing                    }
+    
+.L_lower_nice:
+    {   sub vec_one, int_min, lower             ;   xm.neg vec_three, upper                    }
+    {   mv vec_two, int_max                    ; nop                                           }
+
+
+.L_not_nice_thing:
+
+    {   addi t3,sp, (STACK_VEC(0))*4              ; nop                                           }
+    {   mv s6, vec_one                         ;   mv s7, vec_one                         }
+xm.zip s7, s6, 4
+    { nop                                           ;   xm.bl .L_std_func                          }
+
+    {   addi t3,sp, (STACK_VEC(1))*4              ; nop                                           }
+    {   mv s6, vec_two                         ;   mv s7, vec_two                         }
+xm.zip s7, s6, 4
+    { nop                                           ;   xm.bl .L_std_func                          }
+
+    {   addi t3,sp, (STACK_VEC(3))*4              ; nop                                           }
+{   xm.neg s6, vec_one                         ;   nop}
+{nop; xm.neg s7, vec_one                         }
+xm.zip s7, s6, 4
+    { nop                                           ;   xm.bl .L_std_func                          }
+
+    {   addi t3,sp, (STACK_VEC(2))*4              ; nop                                           }
+{   xm.neg s6, vec_two                         ;  nop}
+{nop; xm.neg s7, vec_two                         }
+xm.zip s7, s6, 4
+    { nop                                           ;   xm.bl .L_std_func                          }
+
+    {   addi t3,sp, (STACK_VEC(4))*4              ; nop                                           }
+    {   mv s6, vec_three                       ;   mv s7, vec_three                       }
+xm.zip s7, s6, 4
+    { nop                                           ;   xm.bl .L_std_func                          }
+
+    {   addi t3,sp, (STACK_VEC(5))*4              ; nop                                           }
+{   xm.neg s6, vec_three              ; nop                                           }
+{   nop; xm.neg s7, vec_three                       }
+xm.zip s7, s6, 4
+    { nop                                           ;   xm.bl .L_std_func                          }
+
+    { nop                                           ;   xm.bu .L_std_func_end                      }
+.L_std_func:
+        xm.stdi  s6,s6, 0(t3)
+        xm.stdi  s6,s6, 8(t3)
+        xm.stdi  s6,s6, 16(t3)
+        xm.stdi  s6,s6, 24(t3)
+        ret 
+.L_std_func_end:
+/*
+{   nop; xm.ldawsp vec_one, STACK_VEC(0)  *4        }
+{   nop; xm.ldawsp vec_none, STACK_VEC(3) *4        }
+{   nop; xm.ldawsp vec_two, STACK_VEC(1)  *4        }
+{   nop; xm.ldawsp vec_ntwo, STACK_VEC(2) *4        }
+{   nop; xm.ldawsp vec_three, STACK_VEC(4) *4       }
+{   nop; xm.ldawsp vec_nthree, STACK_VEC(5)*4       }
+*/
+{addi vec_one,sp, (STACK_VEC(0))*4 ; nop}
+{addi vec_none,sp, (STACK_VEC(3))*4 ; nop}
+{addi vec_two,sp, (STACK_VEC(1))*4 ; nop}
+{addi vec_ntwo,sp, (STACK_VEC(2))*4 ; nop}
+{addi vec_three,sp, (STACK_VEC(4))*4 ; nop}
+{addi vec_nthree,sp, (STACK_VEC(5))*4 ; nop}
+
+    {   li t3, 32                             ;   xm.brff N, .L_not_nice_loop_bot              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+.L_not_nice_loop_top:
+            xm.vlashr b, b_shr
+        {   add b, b, t3                       ;   xm.vladd vec_one}
+        {   addi N, N, -1                         ;   xm.vladd vec_none}
+        { nop                                       ;   xm.vladd vec_two}
+        { nop                                       ;   xm.vladd vec_three}
+        { nop                                       ;   xm.vladd vec_nthree}
+        { nop                                       ;   xm.vladd vec_ntwo}
+        {   add a, a, t3                       ;   xm.vstr a}
+        { nop                                       ;   xm.bt N, .L_not_nice_loop_top              }
+.L_not_nice_loop_bot:
+    
+    { nop                                       ;   xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr b, b_shr
+    { nop                                       ;   xm.vladd vec_one}
+    { nop                                       ;   xm.vladd vec_none}
+    { nop                                       ;   xm.vladd vec_two}
+    { nop                                       ;   xm.vladd vec_three}
+    { nop                                       ;   xm.vladd vec_nthree}
+    { nop                                       ;   xm.vladd vec_ntwo}
+
+
+.L_finishish:
+    { nop                                       ;   xm.vstd tmp1}
+    xm.vstrpv a, tail
+    xm.vstrpv tmp1, tail
+    { nop                                       ;   xm.vldd tmp1}
+    { nop                                       ;   xm.vstd tmp1}
+
+.L_finish:
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 24                          (sp)}
+        xm.lddsp  s3,s2,0
+        xm.lddsp  s5,s4,8
+        xm.lddsp  s7,s6,16
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_func_end:
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c
new file mode 100644
index 00000000..540ad8c1
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c
@@ -0,0 +1,36 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if defined (__VX4B__)
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "xmath/xmath.h"
+#include "vpu_helper.h"
+#include "xmath/xs3/vpu_scalar_ops.h"
+
+
+
+int64_t vect_s16_dot(
+    const int16_t b[],
+    const int16_t c[],
+    const unsigned length)
+{
+    //#warn vect_s16_dot is not yet optimised for vx4b.
+    
+    // Note: instead of using the 32-bit accumulators for this, the assembly version of this function implements
+    //       makeshift 48-bit accumulators, which is why this is using a 64-bit int for accumulation.
+    vpu_int32_acc_t acc = 0;
+
+    const int64_t upper_sat_bound = 0x7FFFFFFFFFFFLL;
+    const int64_t lower_sat_bound = -upper_sat_bound;
+
+    for(unsigned k = 0; k < length; k++){
+        acc += vlmacc16(0, b[k], c[k]);
+        acc = MAX(lower_sat_bound, MIN(upper_sat_bound, acc));
+    }
+
+    return acc;
+}
+
+#endif
\ No newline at end of file
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_energy.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_energy.S
new file mode 100644
index 00000000..086ada65
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_energy.S
@@ -0,0 +1,116 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+int32_t vect_s16_energy(
+    const int16_t b[],
+    const unsigned length,
+    const right_shift_t b_shr);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (4)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+
+#define FUNCTION_NAME   vect_s16_energy
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-16-4)
+#define STACK_VEC_VR        (NSTACKWORDS-16-16-4)
+#define STACK_VEC_TMP2       (NSTACKWORDS-8-2)
+
+#define b           x10
+#define N           x11
+#define b_shr       x12
+#define vec_tmp     x13
+#define tail        x18
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+
+     {   addi s4,sp, (STACK_VEC_TMP2)*4        ;   nop                                  }
+         addi s5, s4, (-30)
+        li t3, 0x100
+    { nop                                           ;   addi vec_tmp,sp, (STACK_VEC_TMP)*4         }
+    {   slli tail, N, SIZEOF_LOG2_S16            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S16            \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.vclrdr                                  }
+    {   srli N, N, EPV_LOG2_S16                  ;   xm.brff tail, .L_tail_dealt_with_s16         }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S16                  \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    { nop                                           ;   slli N, N, 5                             }
+    {   add t3, b, N                           ;   xm.vstd vec_tmp}
+    {   xm.mkmsk tail, tail                        ; nop                                           }
+        xm.vlashr t3, b_shr
+        xm.vstrpv vec_tmp, tail
+#undef tail
+
+    { nop                                           ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    {   srli N, N, 5                             ;   xm.vlmaccr0 vec_tmp}
+    xm.vlmaccr1 vec_tmp
+        {nop ; xm.vstd s4}    
+        {nop ; xm.vldd s5}
+        {nop ; xm.vstr s4}
+        {nop ; xm.vldr s5}
+
+.L_tail_dealt_with_s16:
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.brff N, .L_loop_bot_s16                   }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+
+.L_loop_top_s16:
+        {   li t3, 32                             ;   xm.vstr t3}
+            xm.vlashr b, b_shr
+        {   add b, b, t3                           ;   xm.vstr vec_tmp}
+        {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vldc vec_tmp}
+        { nop                                           ;   xm.vldr t3}
+        {   addi N, N, -1                             ;   xm.vlmaccr0 vec_tmp}
+        xm.vlmaccr1 vec_tmp
+        {nop ; xm.vstd s4}    
+        {nop ; xm.vldd s5}
+        {nop ; xm.vstr s4}
+        {nop ; xm.vldr s5}
+        { nop                                           ;   xm.bt N, .L_loop_top_s16                   }
+.L_loop_bot_s16:
+
+.L_finish_s16:
+
+    //{ nop                                           ;   xm.vadddr                                  }
+    { nop                                           ;   xm.vstd vec_tmp}
+       addi s4, vec_tmp, 32-2
+    { nop                                           ;   lw a1, 0(s4)}
+    {   slli a1, a1, 16                          ;   xm.vstr vec_tmp}
+    { nop                                           ;   lw a0, 0(s4)}
+        xm.lddsp  s3,s2,0
+         xm.lddsp  s5,s4,8
+    { xm.zexti a0, 16 ;nop}
+    {   or a0, a0, a1                          ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_end: 
+    .size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_high_byte.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_high_byte.S
new file mode 100644
index 00000000..876bb503
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_high_byte.S
@@ -0,0 +1,132 @@
+// Copyright 2021-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+void vect_s16_extract_high_byte(
+    int8_t a[],
+    const int16_t b[],
+    const unsigned len);
+*/
+
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (12+8+4)
+
+#define FUNCTION_NAME   vect_s16_extract_high_byte
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_LEN       (8)
+#define STACK_TMP       (0)
+
+#define a           x10 
+#define b           x11 
+#define len         x12
+#define eight       x13
+#define vec_tmp     x18
+#define tmp         x19
+#define _16         x20
+#define tail        x21
+#define vec_0x007F  x22
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+      xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+      li t3, 0x100
+      xm.stdsp  s3,s2,8
+      xm.stdsp  s5,s4,16
+      xm.stdsp  s7,s6,24
+    { xm.mkmski s8, 4                              ; sw s8, 4                            (sp)}
+    { mv t3, len                              ; xm.vsetc t3}
+    { xm.zexti t3, 4                               ; srli len, len, EPV_LOG2_S16                }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S16                \nMessage: The shift amount is not 32" */
+    { addi vec_tmp,sp, (STACK_VEC_TMP)*4           ; xm.mkmsk tail, t3                           }
+    { addi tmp,sp, (STACK_TMP)*4                   ; sw len, (STACK_LEN)*4                    (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+
+// First thing, write 0x80 to all outputs.
+lui t3, %hi(vpu_vec_0x80)
+      addi t3,t3, %lo(vpu_vec_0x80)
+    { li _16, 16                               ; mv s6, a                                 }
+    { li eight, 8                              ; xm.vldr t3}
+    { xm.mkmski t3, 16                             ; xm.brff len, .L_set_0x80_loop_end              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_set_0x80_loop_top:
+      { addi len, len, -1                         ; nop                                           }
+        xm.vstrpv s6, t3
+      { add s6, s6, _16                         ; xm.bt len, .L_set_0x80_loop_top              }
+    .L_set_0x80_loop_end:
+    { nop                                         ; xm.brff tail, .L_0x80_no_tail                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+      xm.vstrpv s6, tail
+    .L_0x80_no_tail:
+
+// Now that that's done, actually compute outputs, only overwriting those that shouldn't be 0x80
+//  (this is to avoid symmetric saturation)
+
+lui t3, %hi(vpu_vec_0x007F)
+      addi t3,t3, %lo(vpu_vec_0x007F)
+    { nop                                           ; lw len, (STACK_LEN)*4                    (sp)}
+    { mv vec_0x007F, t3                       ; xm.brff len, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ; xm.bu .L_loop_top                            }
+
+.p2align 4
+.L_loop_top:
+          xm.vlashr b, eight
+{ xm.neg eight, eight                          ; nop}
+{nop;xm.vstr vec_tmp}
+        { add b, b, _16                             ; xm.vladd vec_0x007F}
+        { xm.mkmski t3, 16                             ; xm.vdepth1                                   }
+          xm.vstrpv tmp, s8
+        { addi len, len, -1                           ; lw s7,0                            ( tmp)}
+        { xm.andnot t3, s7                            ; add b, b, _16                             }
+          xm.vlashr vec_tmp, eight
+{ xm.neg eight, eight                          ; nop}
+{nop;xm.vdepth8                                   }
+          xm.vstrpv a, t3
+        { add a, a, _16                             ; xm.bt len, .L_loop_top                       }
+
+.L_loop_bot:
+    { nop                                           ; xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+      xm.vlashr b, eight
+{ xm.neg eight, eight                          ; nop}
+{nop;xm.vstr vec_tmp}
+    { nop                                           ; xm.vladd vec_0x007F}
+    { nop                                           ; xm.vdepth1                                 }
+      xm.vstrpv tmp, s8                       
+    { nop                                           ; lw s7,0                          ( tmp)}
+    { xm.andnot tail, s7                           ; nop                                         }
+      xm.vlashr vec_tmp, eight                    
+    { nop                                           ; xm.vdepth8                                 }
+      xm.vstrpv a, tail               
+    
+    
+.L_finish:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+    { nop                                           ; lw s8, 4                        (sp)}
+    { nop                                           ; xm.retsp (NSTACKWORDS)*4                     } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_low_byte.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_low_byte.S
new file mode 100644
index 00000000..c8dfb1f8
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_low_byte.S
@@ -0,0 +1,129 @@
+// Copyright 2021-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+void vect_s16_extract_low_byte(
+    int8_t a[],
+    const int16_t b[],
+    const unsigned len);
+*/
+
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (8+8+4)
+
+#define FUNCTION_NAME   vect_s16_extract_low_byte
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+
+#define STACK_LEN       (6)
+#define STACK_TMP       (7)
+
+#define a           x10 
+#define b           x11 
+#define len         x12
+#define tmpA        x13
+#define tmpB        x18
+#define _16         x19
+#define tail        x20
+#define vec_0x7FFF  x21
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+      xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+      li t3, 0x100
+      xm.stdsp  s3,s2,8
+      xm.stdsp  s5,s4,16
+    { mv t3, len                              ; xm.vsetc t3}
+    { xm.zexti t3, 4                               ; srli len, len, EPV_LOG2_S16                }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S16                \nMessage: The shift amount is not 32" */
+    { addi tmpA,sp, (STACK_TMP)*4                  ; xm.mkmsk tail, t3                           }
+    { nop                                           ; sw len, (STACK_LEN)*4                    (sp)}
+
+// First thing, write 0x80 to all outputs.
+      la t3, vpu_vec_0x80
+    { li _16, 16                               ; mv tmpB, a                               }
+    { nop                                           ; xm.vldr t3}
+    { xm.mkmski t3, 16                             ; xm.brff len, .L_set_0x80_loop_end              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_set_0x80_loop_top:
+      { addi len, len, -1                           ; nop                                           }
+        xm.vstrpv tmpB, t3
+      { add tmpB, tmpB, _16                       ; xm.bt len, .L_set_0x80_loop_top              }
+    .L_set_0x80_loop_end:
+    { nop                                           ; xm.brff tail, .L_0x80_no_tail                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+      xm.vstrpv tmpB, tail
+    .L_0x80_no_tail:
+
+// Now that that's done, actually compute outputs, only overwriting those that shouldn't be 0x80
+//  (this is to avoid symmetric saturation)
+
+      la t3, vpu_vec_0x0100
+    { nop                                           ; xm.vldc t3}
+      la t3, vpu_vec_0x7FFF
+    { mv vec_0x7FFF, t3                       ; lw len, (STACK_LEN)*4                    (sp)}
+    { addi t3,sp, (STACK_VEC_TMP)*4               ; xm.brff len, .L_loop_bot                       }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ; xm.bu .L_loop_top                            }
+
+.p2align 4
+.L_loop_top:
+        { addi len, len, -1                           ; xm.vclrdr                                    }
+        {nop                            ; xm.vlmacc0 b}
+        xm.vlmacc1 b
+        { add b, b, _16                             ;nop}
+        { add b, b, _16                             ; xm.vstr t3}
+        { xm.mkmski tmpB, 4                             ; xm.vladd vec_0x7FFF}
+        { nop                                           ; xm.vdepth1                                   }
+          xm.vstrpv tmpA, tmpB
+        { xm.mkmski tmpB, 16                            ; lw s3,0                           ( tmpA)}
+{ xm.andnot tmpB, s3                           ; nop}
+{nop;xm.vldr t3}
+        { li _16, 16                               ; xm.vdepth8                                   }
+          xm.vstrpv a, tmpB
+        { add a, a, _16                             ; xm.bt len, .L_loop_top                       }
+
+.L_loop_bot:
+    { nop                                           ; xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ; xm.vclrdr                                  }
+    { nop                                           ; xm.vlmacc0 b}
+    xm.vlmacc1 b
+    { nop                                           ; xm.vstr t3}
+    { nop                                           ; xm.vladd vec_0x7FFF}
+    { xm.mkmski tmpB, 4                             ; xm.vdepth1                                 }
+      xm.vstrpv tmpA, tmpB                       
+    { nop                                           ; lw s3,0                         ( tmpA)}
+{ xm.andnot tail, s3                           ; nop}
+{nop;xm.vldr t3}
+    { nop                                           ; xm.vdepth8                                 }
+      xm.vstrpv a, tail               
+    
+    
+.L_finish:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+    { nop                                           ; xm.retsp (NSTACKWORDS)*4                     } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_inverse.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_inverse.S
new file mode 100644
index 00000000..1cdad3fe
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_inverse.S
@@ -0,0 +1,66 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+void vect_s16_inverse(
+    int16_t a[],
+    const int16_t b[],
+    const unsigned length,
+    const unsigned scale);
+
+*/
+
+#define NSTACKWORDS     (4)
+
+#define FUNCTION_NAME   vect_s16_inverse
+
+#define a               x10
+#define b               x11
+#define length          x12
+#define scale           x13
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+
+{   xm.mkmsk scale, scale                      ;  xm.entsp (NSTACKWORDS)*4                    }
+{ sw s2, 0(sp); li s2, 15}
+xm.mkmsk s2, s2
+{   addi scale, scale, 1                     ; nop                                           }
+{   addi length, length, -1                   ;   xm.brff length, .L_finish                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top:
+    xm.ld16s t3, length(b)
+    div t3, scale, t3
+    xm.min t3, t3, s2
+    xm.st16 t3, length(a)
+    {   addi length, length, -1                   ;   xm.bt length, .L_loop_top                  }
+    
+.L_finish:
+    lw s2, 0(sp)
+    xm.retsp (NSTACKWORDS)*4
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_macc.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_macc.S
new file mode 100644
index 00000000..7da99081
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_macc.S
@@ -0,0 +1,122 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+/*  
+headroom_t vect_s16_macc(
+    int16_t acc[],
+    const int16_t b[],
+    const int16_t c[],
+    const unsigned len,
+    const int acc_shr,
+    const int bc_shr);
+*/
+
+
+#include "../asm_helper.h"
+
+#define NSTACKVECTS     (2)
+#define NSTACKWORDS     (8 + 8*NSTACKVECTS+4)
+
+#define FUNCTION_NAME   vect_s16_macc
+
+#define STACK_VEC_SAT   (NSTACKWORDS-8-4)
+#define STACK_VEC_TMP   (NSTACKWORDS-16-4)
+
+#define acc         x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define bc_shr      x18
+#define _32         x19
+#define tmp         x20
+#define tail        x21
+#define acc_shr     x22
+#define mask        x23
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        li t3, 0x100
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+    {   slli t3, len, SIZEOF_LOG2_S16           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S16           \nMessage: The shift amount is not 32" */
+    mv bc_shr, a5
+    {   xm.zexti t3, 5                             ;  nop}
+    {   slli tmp, bc_shr, 16                     ;   xm.zexti bc_shr, 16                         }
+    {   or bc_shr, tmp, bc_shr                  ;   srli len, len, EPV_LOG2_S16              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+    mv acc_shr, a4
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+
+        xm.stdsp  bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 0)*8
+        xm.stdsp  bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 1)*8
+        xm.stdsp  bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 2)*8
+        xm.stdsp  bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 3)*8
+
+    {   addi bc_shr,sp, (STACK_VEC_SAT)*4          ;   xm.mkmsk tail, t3                         }
+    {   li _32, 32                             ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   xm.mkmski mask, 32                          ;   xm.bu .L_loop_top                          }
+
+.p2align 4
+.L_loop_top:
+            xm.vlashr acc, acc_shr
+            xm.vstrpv acc, mask
+        {   addi len, len, -1                         ;   xm.vclrdr                                  }
+        {   add b, b, _32                           ;   xm.vldc b}
+        {   nop                           ;   xm.vlmacc0 c}
+         xm.vlmacc1 c
+        {   add c, c, _32                           ;   nop}
+        xm.vlsat bc_shr
+        { nop                                           ;   xm.vladd acc}
+        {   add acc, acc, _32                       ;   xm.vstr acc}
+        { nop                                           ;   xm.bt len, .L_loop_top                     }
+
+.L_loop_bot:
+    { nop                                           ;   xm.brff tail, .L_finish                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+      xm.vlashr acc, acc_shr
+      xm.vstrpv acc, tail
+    { nop                                           ;   xm.vclrdr                              }
+    { nop                                           ;   xm.vldc b}
+    { nop                                           ;   xm.vlmacc0 c}
+     xm.vlmacc1 c
+    {   mv t3, bc_shr                         ;   nop}
+    xm.vlsat bc_shr
+    { nop                                           ;   xm.vladd acc}
+    { nop                                           ;   xm.vstd t3}
+        xm.vstrpv t3, tail
+        xm.vstrpv acc, tail
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                       }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_max.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_max.S
new file mode 100644
index 00000000..81c13c40
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_max.S
@@ -0,0 +1,114 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+int32_t vect_s16_max(
+    const int32_t b[],
+    const unsigned length);
+
+
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s16_max
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+#define STACK_VEC_CUR_MAX   (NSTACKWORDS-16-4)
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define tail        x12      // ![0x%X]
+#define tmp         x13      // ![%d]
+#define tmz         x18      // ![%d]
+
+
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+        li t3, 0x100
+    {   slli tail, N, SIZEOF_LOG2_S16            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S16            \nMessage: The shift amount is not 32" */
+    {   srli N, N, EPV_LOG2_S16                  ; nop                                           }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S16                  \nMessage: The shift amount is not 32" */
+    {   slli tmp, N, 5                           ; nop                                           }
+        la t3, vpu_vec_0x8000
+    {   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   xm.vldr t3}
+    {   add t3, b, tmp                         ;   xm.vstr t3}
+    {   xm.zexti tail, 5                            ;   xm.vldr t3}
+    {   xm.mkmsk tail, tail                        ;   addi t3,sp, (STACK_VEC_CUR_MAX)*4         }
+        xm.vstrpv t3, tail
+    
+    // Tail is fully accounted for in cur_max now.
+
+#undef tail
+#define cur_max     x12      // ![0x%08X]
+    
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   mv cur_max, t3                        }
+    { nop                                           ;   xm.vclrdr                                  }
+    {   mv t3, b                              ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        // cur_max[] saved in stack
+
+        {   mv b, t3                              ;   xm.vldr t3} //  vR[i] = b[i]
+        {   addi N, N, -1                             ;   xm.vlsub cur_max} //  vR[i] = cur_max[i] - b[i]
+        {   addi t3,sp, 0                         ;   xm.vdepth1                                 } //  vR[0] = [bitmask -- 1 where vR[i] < 0]  b[i] > cur_max[i]
+        {   xm.mkmski tmp, 2                            ; nop                                           }
+            xm.vstrpv t3, tmp
+        {   mv t3, b                              ;   lw tmp, 0                          (sp)}
+        {   mv tmz, tmp                            ;   xm.vldr t3}
+xm.zip tmz, tmp, 0
+        { nop                                           ;   li t3, 32                             }
+            xm.vstrpv cur_max, tmp
+        {   add t3, b, t3                         ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+    {   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   li N, 15                               }
+    xm.ld16s cur_max, N(x28)
+    addi t3,sp, (STACK_VEC_CUR_MAX)*4 
+    {   addi t3, t3, -2                         ; nop                                           }
+    .L_loop2_top:
+         xm.ld16s a0, N(x28)
+        {   addi N, N, -1                            ;   nop}
+        {   xm.slt tmp, a0, cur_max                    ; nop                                           }
+        {   xm.shli tmp, tmp, 1; nop}
+        {   nop                                           ;   xm.bru tmp                                 }
+            {   mv cur_max, a0                         ; nop                                           }
+        {   mv a0, cur_max                         ;   xm.bt N, .L_loop2_top                      }
+
+        xm.lddsp  s3,s2,0
+        xm.lddsp  s5,s4,8
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_end: 
+    .size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_min.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_min.S
new file mode 100644
index 00000000..8f5f0782
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_min.S
@@ -0,0 +1,115 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+int32_t vect_s16_min(
+    const int32_t b[],
+    const unsigned length);
+
+
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s16_min
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+#define STACK_VEC_CUR_MIN   (NSTACKWORDS-16-4)
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define tail        x12      // ![0x%X]
+#define tmp         x13      // ![%d]
+#define tmz         x18      // ![%d]
+
+
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        li t3, 0x100
+    {   slli tail, N, SIZEOF_LOG2_S16            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S16            \nMessage: The shift amount is not 32" */
+    {   srli N, N, EPV_LOG2_S16                  ; nop                                           }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S16                  \nMessage: The shift amount is not 32" */
+    {   slli tmp, N, 5                           ; nop                                           }
+        la t3, vpu_vec_0x7FFF
+    {   addi t3,sp, (STACK_VEC_CUR_MIN)*4         ;   xm.vldr t3}
+    {   add t3, b, tmp                         ;   xm.vstr t3}
+    {   xm.zexti tail, 5                            ;   xm.vldr t3}
+    {   xm.mkmsk tail, tail                        ;   addi t3,sp, (STACK_VEC_CUR_MIN)*4         }
+        xm.vstrpv t3, tail
+    
+    // Tail is fully accounted for in cur_min now.
+
+#undef tail
+#define cur_min     x12      // ![0x%08X]
+    
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   mv cur_min, t3                        }
+    { nop                                           ;   xm.vclrdr                                  }
+    {   mv t3, b                              ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        // cur_min[] saved in stack
+
+        {   mv b, t3                              ;   xm.vldr t3} //  vR[i] = b[i]
+        {   addi N, N, -1                             ;   xm.vlsub cur_min} //  vR[i] = cur_min[i] - b[i]
+        {   addi t3,sp, 0                         ;   xm.vdepth1                                 } //  vR[0] = [bitmask -- 1 where vR[i] < 0]  b[i] > cur_min[i]
+        {   xm.mkmski tmp, 2                            ; nop                                           }
+            xm.vstrpv t3, tmp
+        {   mv t3, b                              ;   lw tmp, 0                          (sp)}
+        {   mv tmz, tmp                            ;   xm.vldr t3}
+            xm.zip tmz, tmp, 0
+        {   xm.not tmp, tmp                            ;   li t3, 32                             }
+            xm.vstrpv cur_min, tmp
+        {   add t3, b, t3                         ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+    {   addi t3,sp, (STACK_VEC_CUR_MIN)*4         ;   li N, 15                               }
+    xm.ld16s cur_min, N(x28)
+    addi t3,sp, (STACK_VEC_CUR_MIN)*4 
+    {   addi t3, t3, -2                         ; nop                                           }
+    .L_loop2_top:
+
+        xm.ld16s a0, N(x28)
+        {   addi N, N, -1                             ;   nop}
+        {   xm.slt tmp, cur_min, a0                    ; nop                                           }
+        {xm.shli tmp, tmp, 1; nop}
+        { nop                                           ;   xm.bru tmp                                 }
+            {   mv cur_min, a0                         ; nop                                           }
+        {   mv a0, cur_min                         ;   xm.bt N, .L_loop2_top                      }
+
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s3,s2,8
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_end: 
+    .size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_mul.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_mul.S
new file mode 100644
index 00000000..4d923bd6
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_mul.S
@@ -0,0 +1,106 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+headroom_t vect_s16_mul(
+    int16_t a[],
+    const int16_t b[],
+    const int16_t c[],
+    const unsigned len,
+    const int a_shr);
+*/
+
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (8+8)
+
+#define FUNCTION_NAME   vect_s16_mul
+
+#define STACK_VEC_SAT   0
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define a_shr       x18
+#define _32         x19
+#define tmp         x20
+#define tail        x21
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        li t3, 0x100
+        xm.stdsp  s3,s2,32
+        xm.stdsp  s5,s4,40
+    {   slli t3, len, SIZEOF_LOG2_S16           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S16           \nMessage: The shift amount is not 32" */
+    mv a_shr, a4
+    {   xm.zexti t3, 5                             ;   nop}
+    {   slli tmp, a_shr, 16                      ;   xm.zexti a_shr, 16                          }
+    {   or a_shr, tmp, a_shr                    ;   srli len, len, EPV_LOG2_S16              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+
+        xm.stdsp  a_shr,a_shr,((STACK_VEC_SAT)/2 + 0)*8
+        xm.stdsp  a_shr,a_shr,((STACK_VEC_SAT)/2 + 1)*8
+        xm.stdsp  a_shr,a_shr,((STACK_VEC_SAT)/2 + 2)*8
+        xm.stdsp  a_shr,a_shr,((STACK_VEC_SAT)/2 + 3)*8
+
+    {   addi a_shr,sp, (STACK_VEC_SAT)*4           ;   xm.mkmsk tail, t3                         }
+    {   li _32, 32                             ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                          }
+
+.p2align 4
+.L_loop_top:
+        {   addi len, len, -1                         ;   xm.vclrdr                                  }
+        {   add b, b, _32                           ;   xm.vldc b}
+        {   nop                         ;   xm.vlmacc0 c}
+        xm.vlmacc1 c
+        {add c, c, _32  ; nop}
+        xm.vlsat a_shr
+        {   add a, a, _32                           ;   xm.vstr a}
+        { nop                                           ;   xm.bt len, .L_loop_top                     }
+
+.L_loop_bot:
+    { nop                                           ;   xm.brff tail, .L_finish                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                              }
+    { nop                                           ;   xm.vldc b}
+    { nop                                           ;   xm.vlmacc0 c}
+    xm.vlmacc1 c
+    {   mv t3, a_shr                          ;   nop}
+    xm.vlsat a_shr
+    { nop                                           ;   xm.vstd t3}
+        xm.vstrpv t3, tail
+        xm.vstrpv a, tail
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+        xm.lddsp  s3,s2,32
+        xm.lddsp  s5,s4,40
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                       }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_nmacc.S
new file mode 100644
index 00000000..21573984
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_nmacc.S
@@ -0,0 +1,122 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+/*  
+headroom_t vect_s16_nmacc(
+    int16_t acc[],
+    const int16_t b[],
+    const int16_t c[],
+    const unsigned len,
+    const int acc_shr,
+    const int bc_shr);
+*/
+
+
+#include "../asm_helper.h"
+
+#define NSTACKVECTS     (2)
+#define NSTACKWORDS     (8 + 8*NSTACKVECTS+4)
+
+#define FUNCTION_NAME   vect_s16_nmacc
+
+#define STACK_VEC_SAT   (NSTACKWORDS-8-4)
+#define STACK_VEC_TMP   (NSTACKWORDS-16-4)
+
+#define acc         x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define bc_shr      x18
+#define _32         x19
+#define tmp         x20
+#define tail        x21
+#define acc_shr     x22
+#define mask        x23
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        li t3, 0x100
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+        xm.stdsp  s7,s6,24
+    {   slli t3, len, SIZEOF_LOG2_S16           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S16           \nMessage: The shift amount is not 32" */
+    mv bc_shr, a5
+    {   xm.zexti t3, 5                             ;   nop}
+    {   slli tmp, bc_shr, 16                     ;   xm.zexti bc_shr, 16                         }
+    {   or bc_shr, tmp, bc_shr                  ;   srli len, len, EPV_LOG2_S16              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+    mv acc_shr, a4
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+
+        xm.stdsp  bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 0)*8
+        xm.stdsp  bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 1)*8
+        xm.stdsp  bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 2)*8
+        xm.stdsp  bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 3)*8
+
+    {   addi bc_shr,sp, (STACK_VEC_SAT)*4          ;   xm.mkmsk tail, t3                         }
+    {   li _32, 32                             ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   xm.mkmski mask, 32                          ;   xm.bu .L_loop_top                          }
+
+.p2align 4
+.L_loop_top:
+            xm.vlashr acc, acc_shr
+            xm.vstrpv acc, mask
+        {   addi len, len, -1                         ;   xm.vclrdr                                  }
+        {   add b, b, _32                           ;   xm.vldc b}
+        {   nop                       ;   xm.vlmacc0 c}
+        xm.vlmacc1 c
+        {   add c, c, _32                           ;   nop}
+     xm.vlsat bc_shr
+        { nop                                           ;   xm.vlsub acc}
+        {   add acc, acc, _32                       ;   xm.vstr acc}
+        { nop                                           ;   xm.bt len, .L_loop_top                     }
+
+.L_loop_bot:
+    { nop                                           ;   xm.brff tail, .L_finish                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+      xm.vlashr acc, acc_shr
+      xm.vstrpv acc, tail
+    { nop                                           ;   xm.vclrdr                              }
+    { nop                                           ;   xm.vldc b}
+    { nop                                           ;   xm.vlmacc0 c}
+    xm.vlmacc1 c
+    {   mv t3, bc_shr                         ;  nop}
+     xm.vlsat bc_shr
+    { nop                                           ;   xm.vlsub acc}
+    { nop                                           ;   xm.vstd t3}
+        xm.vstrpv t3, tail
+        xm.vstrpv acc, tail
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+        xm.lddsp  s3,s2,8
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s7,s6,24
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                       }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_scale.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_scale.S
new file mode 100644
index 00000000..6e67257e
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_scale.S
@@ -0,0 +1,120 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+    headroom_t vect_s16_scale(
+        int16_t a[],
+        const int16_t b[],
+        const unsigned length,
+        const int16_t c,
+        const right_shift_t a_shr);
+
+*/
+
+
+#define NSTACKWORDS     (8+8)
+
+#define FUNCTION_NAME   vect_s16_scale
+
+#define STACK_VEC_A_SHR     0
+#define STACK_BYTEMASK      10
+
+#define a           x10 
+#define b           x11 
+#define len         x12
+#define c           x13
+#define _32         x18
+#define tail        x19
+
+
+.text
+.p2align 2
+
+FUNCTION_NAME:
+    {   mv t3, c                              ;   xm.entsp (NSTACKWORDS)*4                   }
+        xm.stdsp  s3,s2,32
+
+xm.zip t3, c, 4
+        xm.stdsp  c,c,((STACK_VEC_A_SHR/2)+0)*8
+        xm.stdsp  c,c,((STACK_VEC_A_SHR/2)+1)*8
+        xm.stdsp  c,c,((STACK_VEC_A_SHR/2)+2)*8
+        xm.stdsp  c,c,((STACK_VEC_A_SHR/2)+3)*8
+
+#undef  c
+#define tmp     x13
+
+        li t3, 0x100
+    {   addi t3,sp, (STACK_VEC_A_SHR)*4           ;   xm.vsetc t3}
+    mv tmp, a4
+    {   mv t3, tmp                            ;   xm.vldc t3}
+xm.zip t3, tmp, 4
+        xm.stdsp  tmp,tmp,((STACK_VEC_A_SHR/2)+0)*8
+        xm.stdsp  tmp,tmp,((STACK_VEC_A_SHR/2)+1)*8
+        xm.stdsp  tmp,tmp,((STACK_VEC_A_SHR/2)+2)*8
+        xm.stdsp  tmp,tmp,((STACK_VEC_A_SHR/2)+3)*8
+
+#undef tmp
+
+    {   slli tail, len, SIZEOF_LOG2_S16          ;   srli len, len, EPV_LOG2_S16              }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, len, SIZEOF_LOG2_S16          \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   li _32, 32                             }
+
+
+    {   addi t3,sp, (STACK_VEC_A_SHR)*4           ;   xm.brff len, .L_loop_bot                     }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                          }
+
+.p2align 4
+.L_loop_top:
+        {   addi len, len, -1                         ;   xm.vclrdr                                  }
+        { nop                                           ;   xm.vlmacc0 b}
+        xm.vlmacc1 b
+        {   add b, b, _32                           ;   nop}
+        xm.vlsat t3
+        {   add a, a, _32                           ;   xm.vstr a}
+        { nop                                           ;   xm.bt len, .L_loop_top                     }
+.L_loop_bot:
+
+    {   xm.mkmsk tail, tail                        ;   xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   xm.not tail, tail                          ;   xm.vclrdr                                  }
+        xm.vstrpv t3, tail
+    {   xm.not tail, tail                          ;   xm.vlmacc0 b}
+    xm.vlmacc1 b
+    xm.vlsat t3
+        xm.vstrpv a, tail
+        xm.vstrpv t3, tail
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+
+.L_finish:
+    xm.lddsp  s3,s2,32
+
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sqrt.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sqrt.S
new file mode 100644
index 00000000..375aef8f
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sqrt.S
@@ -0,0 +1,203 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+#define XXX 1
+/*  
+
+headroom_t vect_s16_sqrt(
+    int16_t a[],
+    const int16_t b[],
+    const unsigned length,
+    const right_shift_t b_shr,
+    const unsigned depth);
+
+*/
+
+
+#define NSTACKVECTS     (4)
+#define NSTACKWORDS     (12+8*(NSTACKVECTS)+4)
+
+#define FUNCTION_NAME   vect_s16_sqrt
+
+// Temporary vector needed because there's no instruction to do vR[] * vR[]
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+// Holds the shifted values of b[] while we're solving it.
+#define STACK_VEC_TARGET    (NSTACKWORDS-16-4)
+// Holds the power of 2 that is currently being worked on inside hte inner loop.
+// @todo If we had an instruction that set each vR[k] to the value of a register, this wouldn't be needed.
+#define STACK_VEC_POW       (NSTACKWORDS-24-4)
+
+#define STACK_VEC_TMP2       (NSTACKWORDS-32-4)
+
+#define STACK_DEPTH     6
+
+#define a           x10
+#define b           x11
+#define length      x12
+#define b_shr       x13
+
+#define depth       x18 //s2
+#define mask_vec    x19 //s3
+#define _32         x20 //s4
+#define pow_init    x21 //s5
+#define tmp         x24 //s8
+#define spare       x23 //s7
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s8,0                        
+    {addi spare,sp, (STACK_VEC_TMP2)*4             ;   nop}
+
+    sw a4, (STACK_DEPTH)*4(sp)
+// Set VPU mode to 32-bit
+// (length << 1) is the length of the vector in bytes.
+{   li _32, 32                             ; nop                                           }
+{   slli t3, _32, 3                         ;  nop }
+{   slli length, length, 1                   ;   xm.vsetc t3}
+    la t3, vpu_vec_0x4000
+    //la t3, vpu_vec_0x7FFF
+{   mv pow_init, t3                       ; nop                                           }
+
+
+// Maximum supported depth is 15
+{   li tmp, 15                             ;   lw t3, (STACK_DEPTH)*4                (sp)}
+{   xm.assert t3                              ; nop                                           }
+{   xm.sltu t3, tmp, t3                       ; nop                                           }
+{   li _32, 32                             ;   xm.brff t3, .L_vect_loop_top          }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   sw tmp, (STACK_DEPTH)*4                (sp)}
+
+
+.L_vect_loop_top:
+
+    // mask_vec is a byte mask for the elements of a[] that we're currently working on.
+    // using VSTRPV with mask_vec prevents us from corrupting the headroom register.
+    // depth is the number of MSBs that we're solving for
+    {   xm.mkmsk mask_vec, length                  ;   lw depth, (STACK_DEPTH)*4              (sp)}
+
+    // First initialize the target vector using b[]
+    // (Doing this first allows this function to operate in-place on b[] if desired)
+    // @todo If we wanted to, we could do a VSIGN + VLMUL here to take an absolute value of each b[k],
+    //       since this function will not work for any negative b[k].
+        xm.vlashr b, b_shr
+    {   addi t3,sp, (STACK_VEC_TARGET)*4          ;   add b, b, _32                           }
+        xm.vstrpv t3, mask_vec
+
+    // Initialize the result (a[]) with 0's
+    {   mv t3, pow_init                       ;   xm.vclrdr                                  }
+        xm.vstrpv a, mask_vec
+
+    // VEC_POW[] is the bit we're currently solving for. Initialize to the first non-sign bit.
+    // (The VSTD is to zero out the VEC_POW[] elements that are going to be masked out, because
+    //  we're going to use VEC_POW[] later to update the headroom register)
+    {   addi t3,sp, (STACK_VEC_POW)*4             ;   xm.vldr t3}
+    {   li tmp, 1                              ;   xm.vstd t3}
+        xm.vstrpv t3, mask_vec 
+
+    // This saves us a few cycles on the first iteration (because of loop alignment, we'd need a 
+    // 'bu .L_sqrt_loop_top' here even if we didn't want to skip ahead). It's necessary because 
+    // we don't want to right-shift VEC_POW[] on the first iteration (it's already 2^15), and we 
+    // can't fix that by initializing VEC_POW[] to 0x8000 above because that's negative and 
+    // VLASHR is an arithmetic shift.
+    {   addi t3,sp, (STACK_VEC_TARGET)*4          ;   xm.bu .L_first_iter                        }
+
+    // Inner loop. Iteratively solving for the square root bit-by-bit
+    // 12 instructions + 1 FNOP
+    .p2align 4
+    .L_sqrt_loop_top:
+        // Load the next power of 2 and store it back to VEC_POW[]
+            xm.vlashr t3, tmp
+            xm.vstrpv t3, mask_vec
+
+        // Add the current power of 2 to each a[] to get the next value to be tested.
+        // test[k] <-- a[k] + VEC_POW
+        {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vladd a}
+
+         
+
+        // vR[] contains the values we're testing. Store it and square it
+        // vR[k] <-- ( test[k] * test[k] ) >> 14
+            xm.vstrpv t3, mask_vec
+        {  nop     ;   xm.vlmul0 t3}
+        xm.vlmul1 t3
+        xm.vstrpv spare, mask_vec
+        {xm.vladd spare;nop}
+
+        { addi t3,sp, (STACK_VEC_TARGET)*4     ; nop}
+        .L_first_iter:
+
+        // Subtract the squared test values from the target vector   
+        // vR[k] <-- target[k] - (( test[k] * test[k] ) >> 30)
+        { nop                                           ;   xm.vlsub t3}
+
+        // If vR[k] is negative, the test value was too large, so we don't want to update those a[k]
+        // for which vR[k] is negative.
+
+        //  vR[k] = a[k] + MAX( signum( vR[k] ), 0 ) * VEC_POW[k]
+
+        {   addi depth, depth, -1                     ;   xm.vsign                                   }
+        {   addi t3,sp, (STACK_VEC_POW)*4             ;   xm.vpos                                    }
+        
+        xm.vstrpv spare, mask_vec
+        {xm.vladd spare;nop}
+                        
+        {   li tmp, 1                                ;   xm.vlmul0 t3}
+        xm.vlmul1 t3
+        { nop                                           ;   xm.vladd a}
+
+        // Store the updated results in a[]
+            xm.vstrpv a, mask_vec
+        { nop                                           ;   xm.bt depth, .L_sqrt_loop_top              }
+    .L_sqrt_loop_bot:
+
+    // a[] now contains the results, but we haven't updated the headroom register because we've only
+    // been using VSTRPV. So, update the headroom register
+    // @todo Do we need to update the headroom register? Aren't we more or less guaranteed there's no
+    // headroom, because we got rid of the headroom of b[]? Should work out the math on this later.
+    
+    // We used mask_vec when initializing VEC_POW[], so we can use that here to avoid corrupting
+    // the headroom register with data that comes after a[]. x28 is already pointing at VEC_POW[].
+        xm.vstrpv t3, mask_vec
+    {   sub length, length, _32                 ;   xm.vldr t3}
+
+    // If (length - 32) < 1 we're done.
+    {   xm.slt tmp, length, tmp                    ;   xm.vstr t3}
+    {   add a, a, _32                           ;   nop            }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    beqz tmp, .L_vect_loop_top
+.L_vect_loop_bot:
+
+.L_finish:
+
+    xm.lddsp  s3,s2,8
+    xm.lddsp  s5,s4,16
+    xm.lddsp  s7,s8,0
+{   li a0, 15                              ;   xm.vgetc t3}
+{   xm.zexti t3, 5                             ;   nop}
+{   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sum.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sum.S
new file mode 100644
index 00000000..3b63e845
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sum.S
@@ -0,0 +1,113 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+
+/*  
+    int32_t vect_s16_sum(
+        const int16_t b[],
+        const unsigned length);
+*/
+
+
+#include "../asm_helper.h"
+
+
+#define FUNCTION_NAME   vect_s16_sum
+#define NSTACKWORDS     (24+8+4)
+
+
+#define STACK_VEC_TMP       (NSTACKWORDS-24-4)
+#define STACK_VEC_TMP2       (NSTACKWORDS-8-2)
+
+#define b           x10
+#define N           x11
+#define tail        x12
+
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        li t3, 0x0100
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+     {   addi s4,sp, (STACK_VEC_TMP2)*4        ;   nop                                  }
+         addi s5, s4, (-30)
+
+    {   slli tail, N, SIZEOF_LOG2_S16          ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S16            \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                       ;   xm.vclrdr                                  }
+    {   srli N, N, EPV_LOG2_S16                ;   xm.brff tail, .L_tail_dealt_with         }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S16                  \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+        la t3, vpu_vec_0x0001
+    {   addi s2,sp, (STACK_VEC_TMP)*4          ;   xm.vldr t3}
+    { nop                                      ;   xm.vstd s2}
+    {   xm.mkmsk tail, tail                    ;   slli N, N, 3                             }
+        xm.vstrpv s2, tail
+    sh2add s3, N, b  
+    { nop                                      ;   xm.vldc s2}
+    { nop                                      ;   xm.vclrdr                                  }
+    {   srli N, N, 3                           ;   xm.vlmaccr0 s3}
+    xm.vlmaccr1 s3    
+
+    {nop ; xm.vstd s4}    
+    {nop ; xm.vldd s5}
+    {nop ; xm.vstr s4}
+    {nop ; xm.vldr s5}
+
+    {   li t3, 32                              ;   xm.vldc t3}
+
+.L_tail_dealt_with:
+    { nop                                      ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        la t3, vpu_vec_0x0001 
+    {   li t3, 32                              ;   xm.vldc t3}
+  
+.L_loop_top:
+        {   addi N, N, -1                      ;   xm.vlmaccr0 b}
+        xm.vlmaccr1 b
+
+    {nop ; xm.vstd s4}    
+    {nop ; xm.vldd s5}
+    {nop ; xm.vstr s4}
+    {nop ; xm.vldr s5}
+
+        {   add b, b, t3                       ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+.L_finish:
+
+
+
+    {   addi a1,sp, (STACK_VEC_TMP)*4          ;   nop    /* adddr*/                              }
+    
+    addi s4, a1, 32-2
+
+    { nop                                      ;   xm.vstd a1}
+    { nop                                      ;   lw a0, 0(s4)}
+    { slli a0, a0, 16                          ;   xm.vstr a1}
+    { nop                                      ;   lw a1, 0(s4)}
+
+        xm.lddsp  s3,s2,0
+        xm.lddsp  s5,s4,8
+    { xm.zexti a1, 16                          ;   nop}
+    {   or a0, a0, a1                          ;   xm.retsp (NSTACKWORDS)*4                       }
+
+.L_fend: 
+
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_fend - FUNCTION_NAME
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_to_s32.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_to_s32.S
new file mode 100644
index 00000000..0044623a
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_to_s32.S
@@ -0,0 +1,111 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+headroom_t vect_s16_to_vect_s32(
+    int16_t a[],
+    const int32_t b[],
+    const unsigned length);
+*/
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKWORDS     (4)
+
+#define FUNCTION_NAME   vect_s16_to_vect_s32
+
+#define a               x10
+#define b               x11
+#define len             x12
+#define _16             x13
+#define tail            x18
+#define constsA         x28
+#define constsB         x28
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        li t3, 0x0200
+    {   xm.ldap t3, .L_vlmacc_consts_A            ;   xm.vsetc t3}
+
+        xm.stdsp  s3,s2,0
+    {   slli tail, len, SIZEOF_LOG2_S32          ; nop                                           }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, len, SIZEOF_LOG2_S32          \nMessage: The shift amount is not 32" */
+    {   srli len, len, EPV_LOG2_S32              ;   xm.zexti tail, 5                            }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    {   xm.mkmsk tail, tail                        ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                          }
+
+.p2align 4
+
+.L_vlmacc_consts_A:
+.byte 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00
+.L_vlmacc_consts_B:
+.byte 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01
+
+/*
+    This function relies on some seriously horrifying deep magic. Try not to stare directly at it.
+
+    We're converting 16-bit values to 32-bit values with the VPU in 8-bit mode. Because if we deal with a 16-bit value
+    as two separate 8-bit values, we end up getting 2 adjacent accumulators, which is 32 adjacent bits in vR. It's the
+    only way to expand values to a higher bit-depth inside the VPU.
+
+    Function also relies on an understanding of the endianness of the system. A 16-bit value 0x1234 is stored as bytes 
+    (in order) [0x34, 0x12]. To avoid changing the value represented, we need these two bytes to also be adjacent in 
+    the output value. But, because these will end up in separate accumulators (the lower 16-bits of each being in vR 
+    with the rest in vD), in order to ensure this, we need one to end up in the upper byte of the 16 bits and the other 
+    to end up in the lower byte, which means our only option is to multiply by 2^8.
+
+    Ultimately what we need in the first 4 bytes of vR (given the value above) is [0x00, 0x34, 0x12, 0x00] which when
+    written to memory and interpreted as an int32 will be 0x00123400.
+
+    So, the 0x34 just gets MACCed by (0+0+1), leaving it in the lower bits. The 0x12 gets MACCed by (0x7F + 0x7F + 0x02 
+    = 0x100), pushing it into the high bits of the half word in vR.
+
+*/
+
+.L_loop_top: //All in 8-bit mode
+        {   addi len, len, -1                         ;   xm.vclrdr                                  } 
+        {   li _16, 16                             ;   xm.vldc b}
+        {   add b, b, _16                           ;   xm.vlmacc0 t3}
+        xm.vlmacc1 t3
+        {   add t3, t3, _16                       ;   xm.vlmacc0 t3}
+        xm.vlmacc1 t3
+        {   sub t3, t3, _16                       ;   xm.vlmacc0 t3}
+        xm.vlmacc1 t3
+        {   add a, a, _16                           ;   xm.vstr a}
+        {   add a, a, _16                           ;   xm.bt len, .L_loop_top                     }
+.L_loop_bot:
+
+    {   li _16, 16                             ;   xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vldc b}
+    { nop                                           ;   xm.vlmacc0 t3}
+    xm.vlmacc1 t3
+    {   add t3, t3, _16                       ;   xm.vlmacc0 t3}
+    xm.vlmacc1 t3
+    { nop                                           ;   xm.vlmacc0 t3}
+    xm.vlmacc1 t3
+    xm.vstrpv a, tail
+
+.L_finish:
+        xm.lddsp  s3,s2,0
+        xm.retsp (NSTACKWORDS)*4       /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS       \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/s32_to_chunk_s32.S b/lib_xcore_math/src/arch/vx4b/vect_s32/s32_to_chunk_s32.S
new file mode 100644
index 00000000..f8a312ae
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/s32_to_chunk_s32.S
@@ -0,0 +1,49 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+void s32_to_chunk_s32(
+    int32_t a[8],
+    int32_t b);
+*/
+
+
+#define NSTACKWORDS     (0)
+
+#define FUNCTION_NAME   s32_to_chunk_s32
+
+.text
+.p2align 4
+
+
+#define a   x10
+#define b   x11
+
+FUNCTION_NAME:
+  xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in entsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+  xm.stdi  b,b, 0(a)
+  xm.stdi  b,b, 8(a)
+  xm.stdi  b,b, 16(a)
+  xm.stdi  b,b, 24(a)
+  xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs.S
new file mode 100644
index 00000000..5ab41616
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs.S
@@ -0,0 +1,106 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKWORDS     (12)
+#define STACK_TMP_VEC       0
+
+#define a           x10
+#define b           x11
+#define len         x12
+#define tail        x13
+
+
+/*
+headroom_t vect_s32_abs(
+    int32_t a[],
+    const int32_t b[],
+    const unsigned length);
+*/
+
+vect_s32_abs:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   li t3, 0                              ;   slli tail, len, SIZEOF_LOG2_S32          }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, len, SIZEOF_LOG2_S32          \nMessage: The shift amount is not 32" */
+    {   srli len, len, EPV_LOG2_S32              ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.bu .L_apply_op                          }
+
+.L_func_end_s32:
+
+
+
+
+#undef a
+#undef b
+#undef len
+
+/*
+    When branching here:
+        *   a --> x10
+        *   b --> x11
+        *   loop_count --> x12
+        *   tail --> x13
+        *   VPU mode must already be set.
+*/
+
+#define a           x10
+#define b           x11
+#define loop_count  x12
+#define tail        x13
+
+.type .L_apply_op,@function
+
+.L_apply_op:
+
+    {   xm.mkmsk tail, tail                        ; nop                                           }
+    {   mv t3, b                              ;   xm.brff loop_count, .L_loop_bot              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   li a1, 32                              ;   xm.bu .L_loop_top                          }
+.p2align 4
+.L_loop_top:
+        {   addi loop_count, loop_count, -1           ;   xm.vldr t3}
+        { nop                                           ;   xm.vsign                                   }
+        { nop                                           ;   xm.vlmul0 t3} //TODO this is wrong for 16 bit
+        {   add a, a, a1                            ;   xm.vstr a}
+        {   add t3, t3, a1                        ;   xm.bt loop_count, .L_loop_top              }
+.L_loop_bot:
+
+    { nop                                           ;   xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vsign                                   }
+    {   addi t3,sp, (STACK_TMP_VEC)*4             ;   xm.vlmul0 t3} //TODO this is wrong for 16 bit
+    { nop                                           ;   xm.vstd t3}
+    { nop                                           ;   xm.vpos                                    }
+        xm.vstrpv t3, tail
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+        xm.vstrpv a, tail
+
+.L_finish:
+    {   li a0, 32                              ;   xm.vgetc t3}
+    {   srli a1, t3, 8                          ; nop                                           }
+    {   xm.zexti t3, 5                             ;   xm.shr a0, a0, a1                          }
+    {   addi t3, t3, 1                         ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_end_apply_op: 
+.size .L_apply_op, .L_end_apply_op - .L_apply_op
+
+.global vect_s32_abs
+.type vect_s32_abs,@function
+.set vect_s32_abs.nstackwords,NSTACKWORDS;  .global vect_s32_abs.nstackwords /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s32_abs.maxcores,1;               .global vect_s32_abs.maxcores /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s32_abs.maxtimers,0;              .global vect_s32_abs.maxtimers /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s32_abs.maxchanends,0;            .global vect_s32_abs.maxchanends /* Translation error on this line: unexpected token at position 31. */ 
+.size vect_s32_abs, .L_func_end_s32 - vect_s32_abs
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs_sum.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs_sum.S
new file mode 100644
index 00000000..49441ade
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs_sum.S
@@ -0,0 +1,109 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+int64_t vect_s32_abs_sum(
+    const int32_t b[],
+    const unsigned length);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s32_abs_sum
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+#define STACK_VEC_VR        (NSTACKWORDS-16-4)
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define tail        x12      // ![0x%X]
+#define _32         x13      // ![%d]
+#define tmp         x18      // ![%d]
+#define mask        x19      // ![0x%X]
+
+
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+
+    {   li t3, 0                              ;   slli tail, N, SIZEOF_LOG2_S32            }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S32            \nMessage: The shift amount is not 32" */
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vsetc t3}
+    {   xm.zexti tail, 5                            ;   xm.vclrdr                                  }
+    {   srli N, N, EPV_LOG2_S32                  ;   xm.brff tail, .L_tail_dealt_with             }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S32                  \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   mv t3, b                              }
+    {   xm.mkmsk mask, tail                        ;   xm.vstd tmp}
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vldr t3}
+    { nop                                           ;   xm.vsign                                   }
+        xm.vstrpv tmp, mask
+    { nop                                           ;   xm.vldc tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    {   add b, b, tail                          ;   xm.vlmacc0 b}
+.L_tail_dealt_with:
+    {   li _32, 32                             ;   xm.brff N, .L_loop_bot_s32                   }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top_s32:
+        {   mv t3, b                              ;   xm.vstr t3}
+        {   addi N, N, -1                             ;   xm.vldr t3}
+        {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vsign                                   }
+        { nop                                           ;   xm.vstr t3}
+        {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vldc t3}
+        { nop                                           ;   xm.vldr t3}
+        {   add b, b, _32                           ;   xm.vlmacc0 b}
+        { nop                                           ;   xm.bt N, .L_loop_top_s32                   }
+.L_loop_bot_s32:
+
+.L_finish_s32:
+
+    { nop                                           ;   xm.vstr t3}
+lui t3, %hi(vpu_vec_0x40000000)
+        addi t3,t3, %lo(vpu_vec_0x40000000)
+    { nop                                           ;   xm.vldc t3}
+lui t3, %hi(vpu_vec_0x80000000)        
+        addi t3,t3, %lo(vpu_vec_0x80000000)        
+    { nop                                           ;   xm.vlmacc0 t3}
+lui t3, %hi(vpu_vec_zero)
+        addi t3,t3, %lo(vpu_vec_zero)
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vldr t3}
+    { nop                                           ;   xm.vlmaccr0 t3}
+    { nop                                           ;   xm.vstd t3}
+    { nop                                           ;   xm.vlmaccr0 t3}
+    { nop                                           ;   xm.vstr t3}
+    { nop                                           ;   lw a1,0                          ( t3)}
+    {   addi a1, a1, 8                           ;   lw a0,4                          ( t3)}
+
+        xm.lddsp  s3,s2,0
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_end: 
+    .size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmax.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmax.S
new file mode 100644
index 00000000..74fee67f
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmax.S
@@ -0,0 +1,154 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+unsigned vect_s32_argmax(
+    const int32_t b[],
+    const unsigned length);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (3)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s32_argmax
+
+#define STACK_VEC_MAX_DEX   (NSTACKWORDS-8-4)
+#define STACK_VEC_CUR_MAX   (NSTACKWORDS-16-4)
+#define STACK_VEC_CUR_DEX   (NSTACKWORDS-24-4)
+
+#define STACK_N     6
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define vec_8s      x12      // ![0x%X]
+#define tmp         x13      // ![%d]
+#define tmz         x18      // ![%d]
+#define cur_max     x19      // ![0x%08X]
+#define mask_0xF    x20      // ![0x%04X]
+
+
+
+FUNCTION_NAME:
+
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+{   li t3, 0                              ;   sw N, (STACK_N)*4                      (sp)}
+{   srli N, N, 3                             ;   xm.vsetc t3}
+
+// cur_max[i] = -0x80000000
+    la t3, vpu_vec_0x80000000
+{   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   xm.vldr t3}
+{   xm.mkmski mask_0xF, 4                       ;   xm.vstr t3}
+
+// cur_dex[i] = i
+{   addi tmp,sp, (STACK_VEC_CUR_DEX)*4         ;   li t3, 7                             }
+.L_setup_cur_dex:
+xm.stw t3, t3(tmp)  
+    {   addi t3, t3, -1                     ;   xm.bt t3, .L_setup_cur_dex                    }
+
+// max_dex[i] = -1
+    la t3, vpu_vec_neg_1
+{   addi t3,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+{   addi cur_max,sp, (STACK_VEC_CUR_MAX)*4     ;   xm.vstr t3}
+
+    la t3, vpu_vec_0x00000008
+{   mv vec_8s, t3                         ;   xm.vclrdr                                  }
+{   mv t3, b                              ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top:
+    {   mv b, t3                              ;   xm.vldr t3}
+    {   addi N, N, -1                             ;   xm.vlsub cur_max}
+    {   addi t3,sp, 0                         ;   xm.vdepth1                                 }
+        xm.vstrpv t3, mask_0xF
+    {   mv t3, b                              ;   lw tmp, 0                          (sp)}
+    {   mv tmz, tmp                            ; nop                                           }
+xm.zip tmz, tmp, 0
+    {   mv tmz, tmp                            ; nop                                           }
+xm.zip tmz, tmp, 0
+    {   addi t3,sp, (STACK_VEC_CUR_DEX)*4         ;   xm.vldr t3}
+        xm.vstrpv cur_max, tmp
+    {   addi tmz,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+        xm.vstrpv tmz, tmp      
+    { nop                                           ;   xm.vladd vec_8s}
+    {   li t3, 32                             ;   xm.vstr t3}
+    {   add t3, b, t3                         ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+{ nop                                           ;   lw N, (STACK_N)*4                      (sp)}
+{   xm.zexti N, 3                               ; nop                                           }
+{   xm.mkmsk N, N                              ;   xm.brff N, .L_no_tail                        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+{   mv b, t3                              ;   xm.vldr t3}
+{   addi t3,sp, 0                         ;   xm.vlsub cur_max}
+{ nop                                           ;   xm.vdepth1                                 }
+    xm.vstrpv t3, mask_0xF
+{   mv t3, b                              ;   lw tmp, 0                          (sp)}
+{   and tmp, tmp, N                         ;   and tmz, tmp, N                         }
+xm.zip tmz, tmp, 0
+{   mv tmz, tmp                            ; nop                                           }
+xm.zip tmz, tmp, 0
+{   addi t3,sp, (STACK_VEC_CUR_DEX)*4         ;   xm.vldr t3}
+    xm.vstrpv cur_max, tmp
+{   addi tmz,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+    xm.vstrpv tmz, tmp      
+
+.L_no_tail:
+
+#undef cur_max
+#undef vec_8s
+#undef mask_0xF
+
+#define cur_max     x19  // ![%d]
+#define max_dex     x12  // ![0x%08X]
+
+{   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   li N, 7                                }
+{   addi s4,sp, (STACK_VEC_MAX_DEX)*4          ;   xm.ldw cur_max,N            ( t3)}
+{   addi N, N, -1                             ;   xm.ldw max_dex,N                      ( s4)}
+.L_loop2_top:
+    { nop                                           ;   xm.ldw a0,N                         ( t3)}
+    {   xm.slt tmp, a0, cur_max                    ;   xm.ldw s5,N                        ( s4)}
+  xm.eq tmz, a0, cur_max                     
+{ nop ;   xm.bt tmp, .L_less_than                    }
+    .L_greater_or_equal:
+        {    xm.slt tmp, s5, max_dex                   ;   xm.brff tmz, .L_greater                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_equal:
+            { nop                                           ;   xm.brff tmp, .L_less_than                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_greater:
+            {   mv cur_max, a0                         ;   mv max_dex, s5                         }
+
+    .L_less_than:
+    {   addi N, N, -1                             ;   xm.bt N, .L_loop2_top                      }
+
+    xm.lddsp  s5,s4,16
+    xm.lddsp  s3,s2,8
+{   mv a0, max_dex                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+
+.L_end: 
+
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmin.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmin.S
new file mode 100644
index 00000000..9e441893
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmin.S
@@ -0,0 +1,161 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+unsigned vect_s32_argmin(
+    const int32_t b[],
+    const unsigned length);
+
+
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (3)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s32_argmin
+
+#define STACK_VEC_MAX_DEX   (NSTACKWORDS-8-4)
+#define STACK_VEC_CUR_MAX   (NSTACKWORDS-16-4)
+#define STACK_VEC_CUR_DEX   (NSTACKWORDS-24-4)
+
+#define STACK_N     6
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define vec_8s      x12      // ![0x%X]
+#define tmp         x13      // ![%d]
+#define tmz         x18      // ![%d]
+#define cur_min     x19      // ![0x%08X]
+#define mask_0xF    x20      // ![0x%04X]
+#define vec_ones    x21      // ![0x%08X]
+
+
+
+FUNCTION_NAME:
+
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+{   li t3, 0                              ;   sw N, (STACK_N)*4                      (sp)}
+{   srli N, N, 3                             ;   xm.vsetc t3}
+
+// cur_min[i] = 0x7FFFFFFF
+    la t3, vpu_vec_0x7FFFFFFF
+{   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   xm.vldr t3}
+{   xm.mkmski mask_0xF, 4                       ;   xm.vstr t3}
+
+// cur_dex[i] = i
+{   addi tmp,sp, (STACK_VEC_CUR_DEX)*4         ;   li t3, 7                             }
+.L_setup_cur_dex:
+xm.stw t3, t3(tmp)
+    {   addi t3, t3, -1                     ;   xm.bt t3, .L_setup_cur_dex                    }
+
+// max_dex[i] = -1
+lui t3, %hi(vpu_vec_neg_1)
+    addi t3,t3, %lo(vpu_vec_neg_1)
+{   addi t3,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+{   addi cur_min,sp, (STACK_VEC_CUR_MAX)*4     ;   xm.vstr t3}
+
+    la t3, vpu_vec_0x00000008
+{   mv vec_8s, t3                         ;   xm.vclrdr                                  }
+    la t3, vpu_vec_0x00000001
+{   mv vec_ones, t3                       ; nop                                           }
+{   mv t3, b                              ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top:
+    {   mv b, t3                              ;   xm.vldr t3}
+    { nop                                           ;   xm.vladd vec_ones}
+    {   addi N, N, -1                             ;   xm.vlsub cur_min}
+    {   addi t3,sp, 0                         ;   xm.vdepth1                                 }
+        xm.vstrpv t3, mask_0xF
+    {   mv t3, b                              ;   lw tmp, 0                          (sp)}
+    {   xm.not tmp, tmp                            ;   xm.not tmz, tmp                            }
+xm.zip tmz, tmp, 0
+    {   mv tmz, tmp                            ; nop                                           }
+xm.zip tmz, tmp, 0
+    {   addi t3,sp, (STACK_VEC_CUR_DEX)*4         ;   xm.vldr t3}
+        xm.vstrpv cur_min, tmp
+    {   addi tmz,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+        xm.vstrpv tmz, tmp      
+    { nop                                           ;   xm.vladd vec_8s}
+    {   li t3, 32                             ;   xm.vstr t3}
+    {   add t3, b, t3                         ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+{ nop                                           ;   lw N, (STACK_N)*4                      (sp)}
+{   xm.zexti N, 3                               ; nop                                           }
+{   xm.mkmsk N, N                              ;   xm.brff N, .L_no_tail                        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+{   mv b, t3                              ;   xm.vldr t3}
+{ nop                                           ;   xm.vladd vec_ones}
+{   addi t3,sp, 0                         ;   xm.vlsub cur_min}
+{ nop                                           ;   xm.vdepth1                                 }
+    xm.vstrpv t3, mask_0xF
+{   mv t3, b                              ;   lw tmp, 0                          (sp)}
+{   xm.not tmp, tmp                            ; nop                                           }
+{   and tmp, tmp, N                         ;   and tmz, tmp, N                         }
+xm.zip tmz, tmp, 0
+{   mv tmz, tmp                            ; nop                                           }
+xm.zip tmz, tmp, 0
+{   addi t3,sp, (STACK_VEC_CUR_DEX)*4         ;   xm.vldr t3}
+    xm.vstrpv cur_min, tmp
+{   addi tmz,sp, (STACK_VEC_MAX_DEX)*4         ;   xm.vldr t3}
+    xm.vstrpv tmz, tmp      
+
+.L_no_tail:
+
+#undef cur_min
+#undef vec_16s
+#undef mask_0xF
+
+#define cur_min     x19  // ![%d]
+#define min_dex     x12  // ![0x%08X]
+
+{   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   li N, 7                                }
+{   addi s4,sp, (STACK_VEC_MAX_DEX)*4          ;   xm.ldw cur_min,N                     ( t3)}
+{   addi N, N, -1                             ;   xm.ldw min_dex,N                      ( s4)}
+.L_loop2_top:
+    { nop                                           ;   xm.ldw a0,N                          ( t3)}
+    {   xm.slt tmp, cur_min, a0                    ;   xm.ldw s5,N                           ( s4)}
+    xm.eq tmz, cur_min, a0
+{   nop                                     ;     xm.bt tmp, .L_greater_than                 }
+    .L_less_or_equal:
+        {    xm.slt tmp, s5, min_dex                   ;   xm.brff tmz, .L_less                         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_equal:
+            { nop                                           ;   xm.brff tmp, .L_greater_than                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        .L_less:
+            {   mv cur_min, a0                         ;   mv min_dex, s5                         }
+
+    .L_greater_than:
+    {   addi N, N, -1                             ;   xm.bt N, .L_loop2_top                      }
+
+    xm.lddsp  s5,s4,16
+    xm.lddsp  s3,s2,8
+{   mv a0, min_dex                         ;   xm.retsp (NSTACKWORDS)*4                       }
+
+.L_end: 
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_clip.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_clip.S
new file mode 100644
index 00000000..6c8ae9ec
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_clip.S
@@ -0,0 +1,330 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+headroom_t vect_s32_clip(
+    int32_t a[],
+    const int32_t b[],
+    const unsigned length,
+    const int32_t lower_bound,
+    const int32_t upper_bound,
+    const int b_shr);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+
+#define NSTACKVECS      (6)
+#define NSTACKWORDS     (8 + 8*(NSTACKVECS)+8)
+
+#define FUNCTION_NAME   vect_s32_clip
+
+#define STACK_VEC(K)    (NSTACKWORDS - (8*((K)+1))-8)
+
+#define a           x10
+#define b           x11
+#define N           x12
+#define lower       x13
+#define upper       x18
+#define b_shr       x19
+#define tail        x20
+#define tmp1        x21
+#define tmp2        x22
+#define int_max     x23
+#define int_min     x24
+
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+        li t3, 0x0
+        xm.stdsp  s7,s6,16
+    {   slli tail, N, SIZEOF_LOG2_S32            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S32            \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   sw s8, 24                          (sp)}
+
+    {   li tmp1, 31                            ;   srli N, N, EPV_LOG2_S32                  }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S32                  \nMessage: The shift amount is not 32" */
+    {   xm.mkmsk int_max, tmp1                     ;   xm.vclrdr    
+                                  }  
+    {   xm.addi int_min, int_max, 1                ;   xm.mkmsk tail, tail                        }
+
+    // If upper >= 0  and lower <= 0, we can do this more efficiently.
+    mv upper, a4
+    {   li tmp1, 0                             ;   nop}
+    mv b_shr, a5
+    {   xm.slt tmp2, upper, tmp1                   ;   nop  }
+        bnez tmp2, .L_lower_nice 
+    {   xm.slt tmp1, tmp1, lower                   ;   nop  }
+        bnez tmp1, .L_upper_nice 
+
+
+    // Otherwise, we have the nice situation.
+.L_nice:
+
+    //In the nice situation, the upper bound is no more than 1 VLADD away from the positive  saturation 
+    //  point of the VPU, and the lower bound is no more than 1 VLADD away from the negative saturation
+    //  point of the VPU. 
+
+    {   addi t3,sp, (STACK_VEC(0))*4              ;   sub upper, int_max, upper               }
+    { nop                                         ;   sub lower, int_min, lower               }
+
+
+    xm.stdi  upper,upper, 0(t3)
+    xm.stdi  upper,upper, 8(t3)
+    xm.stdi  upper,upper, 16(t3)
+    xm.stdi  upper,upper, 24(t3)
+
+    { nop                                           ;   xm.neg upper, upper                        }
+    { addi t3,sp, (STACK_VEC(1))*4                  ;   nop                                           }
+    xm.stdi  lower,lower, 0(t3)
+    xm.stdi  lower,lower, 8(t3)
+    xm.stdi  lower,lower, 16(t3)
+    xm.stdi  lower,lower, 24(t3)
+
+    { nop                                           ;   xm.neg lower, lower                        }
+    { nop                                           ;   addi t3,sp, (STACK_VEC(2))*4              }
+    
+    xm.stdi  upper,upper, 0(t3)
+    xm.stdi  upper,upper, 8(t3)
+    xm.stdi  upper,upper, 16(t3)
+    xm.stdi  upper,upper, 24(t3)
+
+    { nop                                           ;   addi t3,sp, (STACK_VEC(3))*4              }
+
+    xm.stdi  lower,lower, 0(t3)
+    xm.stdi  lower,lower, 8(t3)
+    xm.stdi  lower,lower, 16(t3)
+    xm.stdi  lower,lower, 24(t3)
+
+#define vec_upper   upper
+#define vec_lower   lower
+#define vec_nupper  tmp1
+#define vec_nlower  tmp2
+#define _32         int_min
+
+    {addi vec_upper,sp, (STACK_VEC(0))*4 ; nop}
+    {addi vec_lower,sp, (STACK_VEC(1))*4 ; nop}
+    {addi vec_nupper,sp, (STACK_VEC(2))*4 ; nop}
+    {addi vec_nlower,sp, (STACK_VEC(3))*4 ; nop}
+    {   li _32, 32                             ;   xm.brff N, .L_nice_loop_bot                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_nice_loop_top:
+            xm.vlashr b, b_shr
+        {   add b, b, _32                       ;   xm.vladd vec_upper}
+        {   addi N, N, -1                       ;   xm.vladd vec_nupper}
+        { nop                                   ;   xm.vladd vec_lower}
+        { nop                                   ;   xm.vladd vec_nlower}
+        {   add a, a, _32                       ;   xm.vstr a}
+        { nop                                   ;   xm.bt N, .L_nice_loop_top                  }
+    .L_nice_loop_bot:
+    
+      beqz tail, .L_finish                  
+    
+      xm.vlashr b, b_shr
+    { nop                                       ;   xm.vladd vec_upper}
+    { nop                                       ;   xm.vladd vec_nupper}
+    { nop                                       ;   xm.vladd vec_lower}
+    { nop                                       ;   xm.vladd vec_nlower}
+     j .L_finishish 
+
+/*
+    C logic:
+
+    void clip16(int16_t output[], int16_t input[], int16_t lower, int16_t upper, unsigned length, int input_shr)
+    {
+        if(upper >= 0 && lower <= 0){
+
+            int16_t up_thing = VPU_INT16_MAX - upper;
+            int16_t lo_thing = VPU_INT16_MIN - lower;
+
+            // 7 instructions required
+            for(unsigned int i = 0; i < length; i++){
+
+                int16_t tmp = input[i] >> input_shr;
+                tmp = SATURATING_ADD(tmp, up_thing);
+                tmp = tmp - up_thing;
+                tmp = SATURATING_ADD(tmp, lo_thing);
+                tmp = tmp - lo_thing
+
+                output[i] = tmp;
+            }
+        } else {
+
+            int16_t one, two, three;
+
+            if(upper >= 0){
+                one = VPU_INT16_MAX - upper;
+                two = VPU_INT16_MIN;
+                three = VPU_INT16_MIN - (lower - upper);
+            } else {
+                one = VPU_INT16_MIN - lower;
+                two = VPU_INT16_MAX;
+                three = VPU_INT16_MAX - (upper - lower);
+            }
+
+            // 9 instructions required
+            for(unsigned int i = 0; i < length; i++){
+
+                int16_t tmp = input[i] >> input_shr;
+                tmp = SATURATING_ADD(tmp, one);
+                tmp = tmp - one;
+                tmp = tmp + two;
+                tmp = SATURATING_ADD(tmp, three);
+                tmp = tmp - three;
+                tmp = tmp - two;
+
+                output[i] = tmp;
+            }
+        }
+    }
+
+*/
+
+
+
+#undef vec_upper 
+#undef vec_lower 
+#undef vec_nupper
+#undef vec_nlower
+#undef _32       
+
+#define vec_one     upper
+#define vec_two     lower
+#define vec_three   tmp1
+
+#define vec_none    tmp2
+#define vec_ntwo    int_max
+#define vec_nthree  int_min
+
+    // The nice thing about the not nice scenario is that at least one of the two bounds is
+    //  guaranteed to be within one VLADD of the relevant saturation point.
+
+.L_upper_nice:
+
+    {   sub vec_one, int_max, upper             ;   xm.neg vec_three, lower                    }
+    addi vec_three, vec_three, -1
+    {   addi vec_two, int_min, 1                    ;   xm.bu .L_not_nice_thing                    }
+    
+.L_lower_nice:
+    {   sub vec_one, int_min, lower             ;   xm.neg vec_three, upper                    }
+    {   mv vec_two, int_max                    ; nop                                           }
+
+
+.L_not_nice_thing:
+
+    {   addi t3,sp, (STACK_VEC(0))*4              ; nop                                           }
+        xm.stdi  vec_one,vec_one, 0(t3)
+        xm.stdi  vec_one,vec_one, 8(t3)
+        xm.stdi  vec_one,vec_one, 16(t3)
+        xm.stdi  vec_one,vec_one, 24(t3)
+    
+    {   addi t3,sp, (STACK_VEC(1))*4              ; nop                                           }
+        xm.stdi  vec_two,vec_two, 0(t3)
+        xm.stdi  vec_two,vec_two, 8(t3)
+        xm.stdi  vec_two,vec_two, 16(t3)
+        xm.stdi  vec_two,vec_two, 24(t3)
+
+
+ lui     t3, 0x80000
+ bne t3, vec_one, .skip1
+    addi vec_one, vec_one, 1
+.skip1:
+ bne t3, vec_two, .skip2
+    addi vec_two, vec_two, 1
+.skip2:
+{   neg vec_one, vec_one                    ;  nop }
+{   nop;neg vec_two, vec_two                    }
+
+    {   addi t3,sp, (STACK_VEC(3))*4              ; nop                                           }
+        xm.stdi  vec_one,vec_one, 0(t3)
+        xm.stdi  vec_one,vec_one, 8(t3)
+        xm.stdi  vec_one,vec_one, 16(t3)
+        xm.stdi  vec_one,vec_one, 24(t3)
+        
+    {   addi t3,sp, (STACK_VEC(2))*4              ; nop                                           }
+        xm.stdi  vec_two,vec_two, 0(t3)
+        xm.stdi  vec_two,vec_two, 8(t3)
+        xm.stdi  vec_two,vec_two, 16(t3)
+        xm.stdi  vec_two,vec_two, 24(t3)
+
+    {   xm.neg vec_two, vec_three                  ; nop                                           }
+    {   addi t3,sp, (STACK_VEC(4))*4              ; nop                                           }
+        xm.stdi  vec_three,vec_three, 0(t3)
+        xm.stdi  vec_three,vec_three, 8(t3)
+        xm.stdi  vec_three,vec_three, 16(t3)
+        xm.stdi  vec_three,vec_three, 24(t3)
+        
+    {   addi t3,sp, (STACK_VEC(5))*4              ; nop                                           }
+        xm.stdi  vec_two,vec_two, 0(t3)
+        xm.stdi  vec_two,vec_two, 8(t3)
+        xm.stdi  vec_two,vec_two, 16(t3)
+        xm.stdi  vec_two,vec_two, 24(t3)
+
+    {addi vec_one,sp, (STACK_VEC(0))*4 ; nop}
+    {addi vec_none,sp, (STACK_VEC(3))*4 ; nop}
+    {addi vec_two,sp, (STACK_VEC(1))*4 ; nop}
+    {addi vec_ntwo,sp, (STACK_VEC(2))*4 ; nop}
+    {addi vec_three,sp, (STACK_VEC(4))*4 ; nop}
+    {addi vec_nthree,sp, (STACK_VEC(5))*4 ; nop}
+
+    {   li t3, 32                             ;   xm.brff N, .L_not_nice_loop_bot              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+.L_not_nice_loop_top:
+            xm.vlashr b, b_shr
+        { add b, b, t3                              ;   xm.vladd vec_one}
+        { addi N, N, -1                             ;   xm.vladd vec_none}
+        { nop                                       ;   xm.vladd vec_two}
+        { nop                                       ;   xm.vladd vec_three}
+        { nop                                       ;   xm.vladd vec_nthree}
+        { nop                                       ;   xm.vladd vec_ntwo}
+        { add a, a, t3                              ;   xm.vstr a}
+        { nop                                       ;   xm.bt N, .L_not_nice_loop_top              }
+.L_not_nice_loop_bot:
+    
+    { nop                                       ;   xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr b, b_shr
+    { nop                                       ;   xm.vladd vec_one}
+    { nop                                       ;   xm.vladd vec_none}
+    { nop                                       ;   xm.vladd vec_two}
+    { nop                                       ;   xm.vladd vec_three}
+    { nop                                       ;   xm.vladd vec_nthree}
+    { nop                                       ;   xm.vladd vec_ntwo}
+
+
+.L_finishish:
+    { nop                                       ;   xm.vstd tmp1}
+    xm.vstrpv a, tail
+    xm.vstrpv tmp1, tail
+    { nop                                       ;   xm.vldd tmp1}
+    { nop                                       ;   xm.vstd tmp1}
+
+.L_finish:
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s8, 24                          (sp)}
+        xm.lddsp  s3,s2,0
+        xm.lddsp  s5,s4,8
+        xm.lddsp  s7,s6,16
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_func_end:
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_dot.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_dot.S
new file mode 100644
index 00000000..b1697f5f
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_dot.S
@@ -0,0 +1,118 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+int64_t vect_s32_dot(
+    const int32_t b[],
+    const int32_t c[],
+    const unsigned length,
+    const int b_shr,
+    const int c_shr);
+*/
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (8 + 24+4)
+
+#define FUNCTION_NAME   vect_s32_dot
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_VR    (NSTACKWORDS-16-4)
+#define STACK_VEC_VD    (NSTACKWORDS-24-4)
+
+#define b           x10
+#define c           x11
+#define N           x12
+#define b_shr       x13
+#define c_shr       x18
+#define tail        x19
+#define vec_vd      x20
+#define vec_vr      x21
+#define vec_tmp     x22
+#define _32         x23
+
+
+
+.text; .issue_mode dual /* Translation error on this line: unexpected token at position 5. */ 
+.p2align 2
+
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+        xm.stdsp  s7,s6,16
+
+    {   li t3, 0                              ; nop                                           }
+    {   slli tail, N, SIZEOF_LOG2_S32            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S32            \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.vclrdr                                  }
+    {   srli N, N, EPV_LOG2_S32                  ;   addi vec_tmp,sp, (STACK_VEC_TMP)*4         }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S32                  \nMessage: The shift amount is not 32" */
+    mv c_shr, a4
+
+{   xm.ldawsp vec_vr, (STACK_VEC_VD + 1)*4       ;   nop}
+{   xm.ldawsp vec_vd, (STACK_VEC_VD    )*4       ;   nop }
+    {   li _32, 32                             ;   xm.brff N, .L_loop_bot_s32                   }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top_s32:
+        {   add vec_vd, vec_vd, _32                 ;   xm.vstd vec_vd}
+        {   sub vec_vd, vec_vd, _32                 ;   xm.vstr vec_vd}
+            xm.vlashr b, b_shr
+        {   add b, b, _32                           ;   xm.vstr vec_tmp}
+        { nop                                           ;   xm.vldc vec_tmp}
+            xm.vlashr c, c_shr
+        {   mv t3, vec_vr                         ;   xm.vstr vec_tmp}
+        {   sub vec_vr, vec_vr, _32                 ;   xm.vldr t3}
+        {   add vec_vr, vec_vr, _32                 ;   xm.vldd vec_vr}  
+        {   addi N, N, -1                             ;   xm.vlmaccr0 vec_tmp}
+        {   add c, c, _32                           ;   xm.bt N, .L_loop_top_s32                   }
+.L_loop_bot_s32:
+    {   xm.mkmsk tail, tail                        ;   xm.brff tail, .L_finish_s32                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   add vec_vd, vec_vd, _32                 ;   xm.vstd vec_vd}
+    {   sub vec_vd, vec_vd, _32                 ;   xm.vstr vec_vd}
+    { nop                                           ;   xm.vclrdr                                  }
+        xm.vlashr b, b_shr
+    {   add b, b, _32                           ;   xm.vstd vec_tmp}
+        xm.vstrpv vec_tmp, tail
+    { nop                                           ;   xm.vldc vec_tmp}
+        xm.vlashr c, c_shr
+    {   mv t3, vec_vr                         ;   xm.vstr vec_tmp}
+    {   sub vec_vr, vec_vr, _32                 ;   xm.vldr t3}
+    { nop                                           ;   xm.vldd vec_vr}  
+    { nop                                           ;   xm.vlmaccr0 vec_tmp}
+
+.L_finish_s32:
+
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ; nop                                           }
+    // (vD:vR)[k] ==  ((int32_t)vD[k])*(2^32) + ((uint32_t)vR[k])
+    { nop                                           ;   xm.vstd t3}
+    { nop                                           ;   lw a1,0                          ( t3)}
+{   xm.sext a1, 8                              ;   nop}
+    {nop; xm.vstr t3}
+    { nop                                           ;   lw a0,0                          ( t3)}
+    
+        xm.lddsp  s3,s2,0
+        xm.lddsp  s5,s4,8
+        xm.lddsp  s7,s6,16
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_energy.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_energy.S
new file mode 100644
index 00000000..1109e9a8
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_energy.S
@@ -0,0 +1,111 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+int64_t vect_s32_energy(
+    const int32_t b[],
+    const unsigned length,
+    const right_shift_t b_shr);
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+
+#define FUNCTION_NAME   vect_s32_energy
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+#define STACK_VEC_VR        (NSTACKWORDS-16-4)
+
+#define b           x10
+#define N           x11
+#define b_shr       x12
+#define vec_tmp     x13
+#define tail        x18
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+
+    {   li t3, 0                              ;   addi vec_tmp,sp, (STACK_VEC_TMP)*4         }
+    {   slli tail, N, SIZEOF_LOG2_S32            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S32            \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.vclrdr                                  }
+    {   srli N, N, EPV_LOG2_S32                  ;   xm.brff tail, .L_tail_dealt_with_s32         }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S32                  \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    { nop                                           ;   slli N, N, 5                             }
+    {   add t3, b, N                           ;   xm.vstd vec_tmp}
+    {   xm.mkmsk tail, tail                        ; nop                                           }
+        xm.vlashr t3, b_shr
+        xm.vstrpv vec_tmp, tail
+#undef tail
+
+    { nop                                           ;   xm.vldc vec_tmp}
+    { nop                                           ;   xm.vclrdr                                  }
+    {   srli N, N, 5                             ;   xm.vlmacc0 vec_tmp}
+
+.L_tail_dealt_with_s32:
+    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.brff N, .L_loop_bot_s32                   }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+
+.L_loop_top_s32:
+        {   li t3, 32                             ;   xm.vstr t3}
+            xm.vlashr b, b_shr
+        {   add b, b, t3                           ;   xm.vstr vec_tmp}
+        {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vldc vec_tmp}
+        { nop                                           ;   xm.vldr t3}
+        {   addi N, N, -1                             ;   xm.vlmacc0 vec_tmp}
+        { nop                                           ;   xm.bt N, .L_loop_top_s32                   }
+.L_loop_bot_s32:
+
+.L_finish_s32:
+
+
+lui t3, %hi(vpu_vec_0x40000000)
+        addi t3,t3, %lo(vpu_vec_0x40000000)
+    {   addi a2,sp, (STACK_VEC_TMP)*4              ;   xm.vldc t3}
+lui t3, %hi(vpu_vec_0x80000000)        
+        addi t3,t3, %lo(vpu_vec_0x80000000)        
+    { nop                                           ;   xm.vstr a2}
+    { nop                                           ;   xm.vlmacc0 t3}
+lui t3, %hi(vpu_vec_zero)        
+        addi t3,t3, %lo(vpu_vec_zero)        
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vldr t3}
+    { nop                                           ;   xm.vlmaccr0 t3}
+    { nop                                           ;   xm.vstd t3}
+    { nop                                           ;   xm.vlmaccr0 t3}
+    { nop                                           ;   xm.vstr t3}
+    { nop                                           ;   lw a1,0                          ( t3)}
+    {   addi a1, a1, 8                           ;   lw a0,4                          ( t3)}
+
+        xm.lddsp  s3,s2,0
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_end: 
+    .size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_inverse.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_inverse.S
new file mode 100644
index 00000000..0524478b
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_inverse.S
@@ -0,0 +1,113 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_s32_inverse(
+    int32_t a[],
+    const int32_t b[],
+    const unsigned length,
+    const unsigned scale);
+
+*/
+
+
+#define NSTACKVECTS     (1)
+#define NSTACKWORDS     (8+8*(NSTACKVECTS)+4)
+
+#define FUNCTION_NAME   vect_s32_inverse
+
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+
+
+#define a               x10
+#define b               x11
+#define length          x12
+#define scale           x13
+#define div_hi          x13
+#define div_lo          x18
+#define v_mask          x19
+#define _32             x20
+#define val1            x21
+#define val2            x22
+#define vec_tmp         x23
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,0
+    xm.stdsp  s5,s4,8
+    xm.stdsp  s7,s6,16
+
+{   li t3, 0                              ;   sw s8, 24                          (sp)}
+{   slli length, length, 2                   ;   xm.vsetc t3}
+
+{   li _32, 32                             ; nop                                           }
+{   sub val2, scale, _32                    ;   li val1, 1                             }
+{   xm.shl div_hi, val1, val2                  ;   xm.shl div_lo, val1, scale                 }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli div_hi, val1, val2                  \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli div_lo, val1, scale                 \nMessage: The shift amount is not 32" */
+{   xm.vclrdr;    nop}
+{   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ;   xm.brff length, .L_loop_bot                  }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+{xm.vldr b; nop}
+.p2align 4
+.L_loop_top:
+    // The masked out elements will  
+    {   xm.mkmsk v_mask, length                 ;   xm.vstd vec_tmp                            }
+        xm.vlashr b, v_mask
+    {   sub length, length, _32                 ;   xm.vsign                                   }
+
+    {   nop                                     ;   xm.vlmul0 b                                }
+        xm.vstrpv vec_tmp, v_mask
+        xm.vlashr b, v_mask
+    {   add b, b, _32                           ;   xm.vsign                                   }
+    {   mv val2, v_mask                         ;   nop                                        }
+    .L_div_loop_top:
+
+        {   srli val2, val2, 4                       ;   lw val1,0                    ( vec_tmp)}
+            xm.ldivu val1, s8, div_hi, div_lo, val1
+        {   addi vec_tmp, vec_tmp, 4                 ;   sw val1,0                    ( vec_tmp)}
+        {   nop                                      ;   xm.bt val2, .L_div_loop_top                }
+    .L_div_loop_bot:
+    {   addi vec_tmp,sp, (STACK_VEC_TMP)*4         ; nop                                           }
+
+
+    {   li val1, 1                             ;   xm.vlmul0 vec_tmp}
+        xm.vstrpv a, v_mask
+    {   xm.slt val1, length, val1                  ;   xm.vstr vec_tmp} // Headroom update
+    {   add a, a, _32                           ;   nop                  }
+    beqz val1, .L_loop_top 
+.L_loop_bot:
+
+.L_finish:
+    xm.lddsp  s3,s2,0
+    xm.lddsp  s5,s4,8
+    xm.lddsp  s7,s6,16
+{   li a0, 31                              ;   xm.vgetc t3}
+{   xm.zexti t3, 5                         ;   lw s8, 24                      (sp)}
+{   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_macc.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_macc.S
new file mode 100644
index 00000000..3b7aeec1
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_macc.S
@@ -0,0 +1,119 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_s32_macc(
+    int32_t acc[],
+    const int32_t b[],
+    const int32_t c[],
+    const unsigned len,
+    const int acc_shr,
+    const int b_shr,
+    const int c_shr);
+*/
+
+
+#define NSTACKWORDS     (8+8+4)
+
+#define FUNCTION_NAME   vect_s32_macc
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_BYTEMASK  7
+
+#define acc         x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define _32         x20
+#define tmp_vec     x21
+#define shr_acc     x22
+#define bytemask    len
+
+.text
+.p2align 2
+
+
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   li t3, 0                              ;   sw s6, 24                           (sp)}
+    {   slli t3, len, SIZEOF_LOG2_S32           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S32           \nMessage: The shift amount is not 32" */
+    {   xm.zexti t3, 5                             ;   srli len, len, EPV_LOG2_S32              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    mv shr_acc, a4
+    
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+    {   li _32, 32                             ;   xm.vclrdr                                  }
+    mv shr_c, a6
+    {   xm.mkmsk t3, t3                          ;   nop}
+    mv shr_b, a5
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   xm.mkmski t3, 32                           ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+    { nop                                           ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                          }
+
+.p2align 4
+.L_loop_top:
+            xm.vlashr acc, shr_acc
+            xm.vstrpv acc, t3
+            xm.vlashr b, shr_b
+            xm.vstrpv tmp_vec, t3
+            xm.vlashr c, shr_c
+        {   add b, b, _32                           ; nop                                           } 
+        {   add c, c, _32                           ;   xm.vlmul0 tmp_vec}
+        { nop                                           ;   xm.vladd acc}
+        {   addi len, len, -1                         ;   xm.vstr acc}
+        {   add acc, acc, _32                       ;   xm.bt len, .L_loop_top                     }
+.L_loop_bot:
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff bytemask, .L_finish                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, bytemask
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, t3 
+        xm.vlashr c, shr_c
+    {   mv t3, tmp_vec                        ;   xm.vlmul0 tmp_vec}
+    { nop                                           ;   xm.vladd acc}
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vstrpv tmp_vec, bytemask
+        xm.vstrpv acc, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+    xm.lddsp  s3,s2,8
+    xm.lddsp  s5,s4,16
+
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s6, 24                           (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_max.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_max.S
new file mode 100644
index 00000000..c3e82220
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_max.S
@@ -0,0 +1,114 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+int32_t vect_s32_max(
+    const int32_t b[],
+    const unsigned length);
+
+
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s32_max
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+#define STACK_VEC_CUR_MAX   (NSTACKWORDS-16-4)
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define tail        x12      // ![0x%X]
+#define tmp         x13      // ![%d]
+#define tmz         x18      // ![%d]
+
+
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+    
+    {   li t3, 0                              ;   slli tail, N, SIZEOF_LOG2_S32            }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S32            \nMessage: The shift amount is not 32" */
+    {   srli N, N, EPV_LOG2_S32                  ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S32                  \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   slli tmp, N, 5                           }
+lui t3, %hi(vpu_vec_0x80000000)
+        addi t3,t3, %lo(vpu_vec_0x80000000)
+    {   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   xm.vldr t3}
+    {   add t3, b, tmp                         ;   xm.vstr t3}
+    {   xm.mkmsk tail, tail                        ;   xm.vldr t3}
+    {   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ; nop                                           }
+        xm.vstrpv t3, tail
+    
+    // Tail is fully accounted for in cur_max now.
+
+#undef tail
+#define cur_max     x12      // ![0x%08X]
+    
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   mv cur_max, t3                        }
+    { nop                                           ;   xm.vclrdr                                  }
+    {   mv t3, b                              ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        // cur_max[] saved in stack
+
+        {   mv b, t3                              ;   xm.vldr t3} //  vR[i] = b[i]
+        {   addi N, N, -1                             ;   xm.vlsub cur_max} //  vR[i] = cur_max[i] - b[i]
+        {   addi t3,sp, 0                         ;   xm.vdepth1                                 } //  vR[0] = [bitmask -- 1 where vR[i] < 0]  b[i] > cur_max[i]
+        {   xm.mkmski tmp, 1                            ; nop                                           }
+            xm.vstrpv t3, tmp
+        {   mv t3, b                              ;   lw tmp, 0                          (sp)}
+        {   mv tmz, tmp                            ;   xm.vldr t3}
+xm.zip tmz, tmp, 0
+        {   mv tmz, tmp                            ;   li t3, 32                             }
+xm.zip tmz, tmp, 0
+            xm.vstrpv cur_max, tmp
+        {   add t3, b, t3                         ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+
+    {   addi t3,sp, (STACK_VEC_CUR_MAX)*4         ;   lw cur_max, (STACK_VEC_CUR_MAX)*4      (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   li N, 7                                ;   addi t3, t3, 4                         }
+    .L_loop2_top:
+        {   addi N, N, -1                             ;   lw a0,0                      ( t3)}
+        {   xm.slt tmp, a0, cur_max                    ; nop                                       }
+        {xm.shli tmp, tmp, 1; nop}
+        {   addi t3, t3, 4                         ;   xm.bru tmp                             }
+            {   mv cur_max, a0                         ; nop                                       }
+        {   mv a0, cur_max                         ;   xm.bt N, .L_loop2_top                  }
+
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s3,s2,8
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_end: 
+    .size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_merge_accs.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_merge_accs.S
new file mode 100644
index 00000000..70f8e88d
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_merge_accs.S
@@ -0,0 +1,104 @@
+// Copyright 2021-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+    void vect_s32_merge_accs(
+        int32_t a[],
+        split_acc_s32_t b[],
+        const unsigned length);
+*/
+
+
+#define NSTACKWORDS     (20)
+
+#define FUNCTION_NAME   vect_s32_merge_accs
+
+#define merged    x10
+#define split     x11
+#define len       x12
+#define _32       x13
+
+#define tmpR      x18
+#define tmpD      x19
+
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,64
+
+  { li t3, 15                               ; li _32, 32                               }
+  { add len, len, t3                         ; sub merged, merged, _32                   }
+  { srli len, len, 4                           ; addi t3,sp, 0                           }
+
+  .L_loop_top:
+
+    { add split, split, _32                     ; xm.vldd split}
+    { addi t3,sp, 32                           ; xm.vstd t3}
+    { add split, split, _32                     ; xm.vldd split}
+    { addi t3,sp, 0                           ; xm.vstd t3}
+    { addi len, len, -1                           ; lw tmpD, 0                           (sp)}
+    { add merged, merged, _32                   ; lw tmpR, 32                           (sp)}
+xm.unzip  tmpD, tmpR, 4
+      xm.stdi  tmpR,tmpD, 0(merged)
+    { nop                                           ; lw tmpD, 4                           (sp)}
+    { nop                                           ; lw tmpR, 36                           (sp)}
+xm.unzip  tmpD, tmpR, 4
+      xm.stdi  tmpR,tmpD, 8(merged)
+    { nop                                           ; lw tmpD, 8                           (sp)}
+    { nop                                           ; lw tmpR, 40                          (sp)}
+xm.unzip  tmpD, tmpR, 4
+      xm.stdi  tmpR,tmpD, 16(merged)
+    { nop                                           ; lw tmpD, 12                           (sp)}
+    { nop                                           ; lw tmpR, 44                          (sp)}
+xm.unzip  tmpD, tmpR, 4
+      xm.stdi  tmpR,tmpD, 24(merged)
+    { nop                                           ; lw tmpD, 16                           (sp)}
+    { nop                                           ; lw tmpR, 48                          (sp)}
+xm.unzip  tmpD, tmpR, 4
+      xm.stdi  tmpR,tmpD, 32(merged)
+    { nop                                           ; lw tmpD, 20                           (sp)}
+    { nop                                           ; lw tmpR, 52                          (sp)}
+xm.unzip  tmpD, tmpR, 4
+      xm.stdi  tmpR,tmpD, 40(merged)
+    { nop                                           ; lw tmpD, 24                           (sp)}
+    { nop                                           ; lw tmpR, 56                          (sp)}
+xm.unzip  tmpD, tmpR, 4
+      xm.stdi  tmpR,tmpD, 48(merged)
+    { nop                                           ; lw tmpD, 28                           (sp)}
+    { nop                                           ; lw tmpR, 60                          (sp)}
+xm.unzip  tmpD, tmpR, 4
+      xm.stdi  tmpR,tmpD, 56(merged)
+    { add merged, merged, _32                   ;nop                     }
+    bnez  len, .L_loop_top 
+
+  .L_finish:
+      xm.lddsp  s3,s2,64
+    { nop                                           ; xm.retsp (NSTACKWORDS)*4                         } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_min.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_min.S
new file mode 100644
index 00000000..d788983d
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_min.S
@@ -0,0 +1,114 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+
+int32_t vect_s32_min(
+    const int32_t b[],
+    const unsigned length);
+
+
+*/
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKVECS      (2)
+#define NSTACKWORDS     (8 + 8*NSTACKVECS+4)
+
+#define FUNCTION_NAME       vect_s32_min
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+#define STACK_VEC_CUR_MIN   (NSTACKWORDS-16-4)
+
+#define b           x10      // ![0x%08X]
+#define N           x11      // ![%d]
+#define tail        x12      // ![0x%X]
+#define tmp         x13      // ![%d]
+#define tmz         x18      // ![%d]
+
+
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.align 16; /* Translation error on this line: unexpected token at position 9. */ 
+
+FUNCTION_NAME:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+    
+    {   li t3, 0                              ;   slli tail, N, SIZEOF_LOG2_S32            }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S32            \nMessage: The shift amount is not 32" */
+    {   srli N, N, EPV_LOG2_S32                  ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S32                  \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   slli tmp, N, 5                           }
+        la t3, vpu_vec_0x7FFFFFFF
+    {   addi t3,sp, (STACK_VEC_CUR_MIN)*4         ;   xm.vldr t3}
+    {   add t3, b, tmp                         ;   xm.vstr t3}
+    {   xm.mkmsk tail, tail                        ;   xm.vldr t3}
+    {   addi t3,sp, (STACK_VEC_CUR_MIN)*4         ; nop                                           }
+        xm.vstrpv t3, tail
+    
+    // Tail is fully accounted for in cur_min now.
+
+#undef tail
+#define cur_min     x12      // ![0x%08X]
+    
+    {   addi tmp,sp, (STACK_VEC_TMP)*4             ;   mv cur_min, t3                        }
+    { nop                                           ;   xm.vclrdr                                  }
+    {   mv t3, b                              ;   xm.brff N, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    .L_loop_top:
+        // cur_min[] saved in stack
+
+        {   mv b, t3                              ;   xm.vldr t3} //  vR[i] = b[i]
+        {   addi N, N, -1                             ;   xm.vlsub cur_min} //  vR[i] = cur_min[i] - b[i]
+        {   addi t3,sp, 0                         ;   xm.vdepth1                                 } //  vR[0] = [bitmask -- 1 where vR[i] < 0]  b[i] > cur_min[i]
+        {   xm.mkmski tmp, 1                            ; nop                                           }
+            xm.vstrpv t3, tmp
+        {   mv t3, b                              ;   lw tmp, 0                          (sp)}
+        {   mv tmz, tmp                            ;   xm.vldr t3}
+xm.zip tmz, tmp, 0
+        {   mv tmz, tmp                            ;   li t3, 32                             }
+xm.zip tmz, tmp, 0
+        {   xm.not tmp, tmp                            ; nop                                           }
+            xm.vstrpv cur_min, tmp
+        {   add t3, b, t3                         ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+
+    {   addi t3,sp, (STACK_VEC_CUR_MIN)*4         ;   lw cur_min, (STACK_VEC_CUR_MIN)*4      (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   li N, 7                                ;   addi t3, t3, 4                         }
+    .L_loop2_top:
+        {   addi N, N, -1                             ;   lw a0,0                      ( t3)}
+        {   xm.slt tmp, cur_min, a0                    ; nop                                       }
+        {xm.shli tmp, tmp, 1; nop}
+        {   addi t3, t3, 4                         ;   xm.bru tmp                         }
+            {   mv cur_min, a0                         ; nop                                       }
+        {   mv a0, cur_min                         ;   xm.bt N, .L_loop2_top                  }
+
+        xm.lddsp  s5,s4,16
+        xm.lddsp  s3,s2,8
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+
+//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.L_end: 
+    .size FUNCTION_NAME, .L_end - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_mul.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_mul.S
new file mode 100644
index 00000000..f3342a09
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_mul.S
@@ -0,0 +1,109 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_s32_mul(
+    int32_t a[],
+    const int32_t b[],
+    const int32_t c[],
+    const unsigned len,
+    const int b_shr,
+    const int c_shr);
+*/
+
+
+#define NSTACKWORDS     (8+8)
+
+#define FUNCTION_NAME   vect_s32_mul
+
+#define STACK_VEC_TMP   0
+#define STACK_BYTEMASK  12
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define _32         x20
+#define tmp_vec     x21
+#define bytemask    len
+
+.text
+.p2align 2
+
+
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   li t3, 0                              ; nop                                           }
+    {   slli t3, len, SIZEOF_LOG2_S32           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S32           \nMessage: The shift amount is not 32" */
+    {   xm.zexti t3, 5                             ;   srli len, len, EPV_LOG2_S32              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+
+        xm.stdsp  s3,s2,32
+        xm.stdsp  s5,s4,40
+    {   li _32, 32                             ;   xm.vclrdr                              }
+    mv shr_c, a5
+    {   xm.mkmsk t3, t3                          ;   nop}
+    mv shr_b, a4
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   xm.mkmski t3, 32                           ;   sw t3, (STACK_BYTEMASK)*4         (sp)}
+    { nop                                           ;   xm.brff len, .L_loop_bot                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                      }
+
+.p2align 4
+.L_loop_top:
+            xm.vlashr b, shr_b
+            xm.vstrpv tmp_vec, t3
+            xm.vlashr c, shr_c
+        {   add b, b, _32                           ; nop                                       } 
+        {   add c, c, _32                           ;   xm.vlmul0 tmp_vec}  
+        {   addi len, len, -1                         ;   xm.vstr a}
+        {   add a, a, _32                           ;   xm.bt len, .L_loop_top                 }
+.L_loop_bot:
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4    (sp)}
+    { nop                                           ;   xm.brff bytemask, .L_finish              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, t3 
+        xm.vlashr c, shr_c
+    {   mv t3, tmp_vec                        ;   xm.vlmul0 tmp_vec}
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vstrpv tmp_vec, bytemask
+        xm.vstrpv a, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+    xm.lddsp  s3,s2,32
+    xm.lddsp  s5,s4,40
+
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                       }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_nmacc.S
new file mode 100644
index 00000000..8d1ac1e0
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_nmacc.S
@@ -0,0 +1,119 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_s32_nmacc(
+    int32_t acc[],
+    const int32_t b[],
+    const int32_t c[],
+    const unsigned len,
+    const int acc_shr,
+    const int b_shr,
+    const int c_shr);
+*/
+
+
+#define NSTACKWORDS     (8+8+4)
+
+#define FUNCTION_NAME   vect_s32_nmacc
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_BYTEMASK  7
+
+#define acc         x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define _32         x20
+#define tmp_vec     x21
+#define shr_acc     x22
+#define bytemask    len
+
+.text
+.p2align 2
+
+
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   li t3, 0                              ;   sw s6, 24                           (sp)}
+    {   slli t3, len, SIZEOF_LOG2_S32           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S32           \nMessage: The shift amount is not 32" */
+    {   xm.zexti t3, 5                             ;   srli len, len, EPV_LOG2_S32              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    mv shr_acc,a4
+    
+        xm.stdsp  s3,s2,8
+        xm.stdsp  s5,s4,16
+    {   li _32, 32                             ;   xm.vclrdr                                  }
+    mv shr_c, a6
+    {   xm.mkmsk t3, t3                          ;  nop}
+    mv shr_b, a5
+    {   addi tmp_vec,sp, (STACK_VEC_TMP)*4         ;   nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   xm.mkmski t3, 32                           ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+    { nop                                           ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                          }
+
+.p2align 4
+.L_loop_top:
+            xm.vlashr acc, shr_acc
+            xm.vstrpv acc, t3
+            xm.vlashr b, shr_b
+            xm.vstrpv tmp_vec, t3
+            xm.vlashr c, shr_c
+        {   add b, b, _32                           ; nop                                           } 
+        {   add c, c, _32                           ;   xm.vlmul0 tmp_vec}
+        { nop                                           ;   xm.vlsub acc}
+        {   addi len, len, -1                         ;   xm.vstr acc}
+        {   add acc, acc, _32                       ;   xm.bt len, .L_loop_top                     }
+.L_loop_bot:
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff bytemask, .L_finish                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr acc, shr_acc
+        xm.vstrpv acc, bytemask
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, t3 
+        xm.vlashr c, shr_c
+    {   mv t3, tmp_vec                        ;   xm.vlmul0 tmp_vec}
+    { nop                                           ;   xm.vlsub acc}
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vstrpv tmp_vec, bytemask
+        xm.vstrpv acc, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+    xm.lddsp  s3,s2,8
+    xm.lddsp  s5,s4,16
+
+
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   lw s6, 24                           (sp)}
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_scale.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_scale.S
new file mode 100644
index 00000000..9c511596
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_scale.S
@@ -0,0 +1,109 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+headroom_t vect_s32_scale(
+    int32_t a[],
+    const int32_t b[],
+    const unsigned len,
+    const int32_t c,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+*/
+
+
+#define NSTACKWORDS     (8+8)
+
+#define FUNCTION_NAME   vect_s32_scale
+
+#define STACK_VEC_TMP   0  
+#define STACK_BYTEMASK  12
+
+#define a           x10 
+#define b           x11 
+#define len         x12
+#define c           x13
+#define shr_b       x18
+#define _32         x19
+#define tmp_vec     x20
+#define bytemask    len
+
+.text
+.p2align 2
+
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,32
+        xm.stdsp  s5,s4,40
+    {   li t3, 0                              ; nop                                           }
+    {   slli t3, len, SIZEOF_LOG2_S32           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S32           \nMessage: The shift amount is not 32" */
+    {   xm.zexti t3, 5                             ;   srli len, len, EPV_LOG2_S32              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    {   li _32, 32                             ;   xm.vclrdr                                  }
+    mv shr_b, a4
+    {   xm.mkmsk t3, t3                          ;   nop}
+    {   xm.mkmski t3, 32                           ;   sw t3, (STACK_BYTEMASK)*4             (sp)}
+        xm.stdsp  c,c,((STACK_VEC_TMP/2)+0)*8
+        xm.stdsp  c,c,((STACK_VEC_TMP/2)+1)*8
+        xm.stdsp  c,c,((STACK_VEC_TMP/2)+2)*8
+        xm.stdsp  c,c,((STACK_VEC_TMP/2)+3)*8
+    mv tmp_vec, a5
+    {   addi c,sp, (STACK_VEC_TMP)*4               ;   nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+        xm.vlashr c, tmp_vec
+        xm.vstrpv c, t3
+
+    { nop                                           ;   xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                          }
+
+.p2align 4
+.L_loop_top:
+            xm.vlashr b, shr_b
+        {   add b, b, _32                           ;   xm.vlmul0 c}  
+        {   addi len, len, -1                         ;   xm.vstr a}
+        {   add a, a, _32                           ;   xm.bt len, .L_loop_top                     }
+.L_loop_bot:
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4        (sp)}
+    { nop                                           ;   xm.brff bytemask, .L_finish                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr b, shr_b
+    {   mv t3, c                              ;   xm.vlmul0 c}
+    { nop                                           ;   xm.vstd t3}
+        xm.vstrpv t3, bytemask
+        xm.vstrpv a, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+    xm.lddsp  s3,s2,32
+    xm.lddsp  s5,s4,40
+
+    {   li a0, 32                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   srli a1, t3, 8                          }
+    {   xm.shr a0, a0, a1                          ;   addi t3, t3, 1                         }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_func_end:
+
+
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_split_accs.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_split_accs.S
new file mode 100644
index 00000000..71281fb8
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_split_accs.S
@@ -0,0 +1,114 @@
+// Copyright 2021-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+    void vect_s32_split_accs(
+        split_acc_s32_t a[],
+        const int32_t b[],
+        const unsigned length);
+*/
+
+
+#define NSTACKWORDS     (20)
+
+#define FUNCTION_NAME   vect_s32_split_accs
+
+#define split     x10
+#define merged    x11
+#define len       x12
+#define _32       x13
+
+#define tmpR      x18
+#define tmpD      x19
+
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4
+    xm.stdsp  s3,s2,64
+
+  { li t3, 15                               ; li _32, 32                               }
+  { add len, len, t3                         ; nop                                           }
+  { srli len, len, 4                           ; addi t3,sp, 0                           }
+
+  .L_loop_top:
+
+      xm.lddi  tmpD,tmpR, 0(merged)
+xm.zip tmpR, tmpD, 4
+    { nop                                           ; sw tmpR, 0                           (sp)}
+    { nop                                           ; sw tmpD, 32                           (sp)}
+    
+      xm.lddi  tmpD,tmpR, 8(merged)
+xm.zip tmpR, tmpD, 4
+    { nop                                           ; sw tmpR, 4                           (sp)}
+    { nop                                           ; sw tmpD, 36                           (sp)}
+    
+      xm.lddi  tmpD,tmpR, 16(merged)
+xm.zip tmpR, tmpD, 4
+    { nop                                           ; sw tmpR, 8                           (sp)}
+    { nop                                           ; sw tmpD, 40                          (sp)}
+    
+      xm.lddi  tmpD,tmpR, 24(merged)
+xm.zip tmpR, tmpD, 4
+    { add merged, merged, _32                   ; sw tmpR, 12                           (sp)}
+    { nop                                           ; sw tmpD, 44                          (sp)}
+    
+      xm.lddi  tmpD,tmpR, 0(merged)
+xm.zip tmpR, tmpD, 4
+    { nop                                           ; sw tmpR, 16                           (sp)}
+    { nop                                           ; sw tmpD, 48                          (sp)}
+    
+      xm.lddi  tmpD,tmpR, 8(merged)
+xm.zip tmpR, tmpD, 4
+    { nop                                           ; sw tmpR, 20                           (sp)}
+    { nop                                           ; sw tmpD, 52                          (sp)}
+    
+      xm.lddi  tmpD,tmpR, 16(merged)
+xm.zip tmpR, tmpD, 4
+    { nop                                           ; sw tmpR, 24                           (sp)}
+    { nop                                           ; sw tmpD, 56                          (sp)}
+    
+      xm.lddi  tmpD,tmpR, 24(merged)
+xm.zip tmpR, tmpD, 4
+    { nop                                           ; sw tmpR, 28                           (sp)}
+    { addi len, len, -1                           ; sw tmpD, 60                          (sp)}
+
+
+    { addi t3,sp, 32                           ; xm.vldd t3}
+    { add split, split, _32                     ; xm.vstd split}
+    { addi t3,sp, 0                           ; xm.vldd t3}
+    { add split, split, _32                     ; xm.vstd split}
+
+    { add merged, merged, _32                   ; nop                     }
+     bnez  len, .L_loop_top 
+
+  .L_finish:
+      xm.lddsp  s3,s2,64
+    { nop                                           ; xm.retsp (NSTACKWORDS)*4                         } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sqrt.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sqrt.S
new file mode 100644
index 00000000..e18d8670
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sqrt.S
@@ -0,0 +1,185 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+headroom_t vect_s32_sqrt(
+    int32_t a[],
+    const int32_t b[],
+    const unsigned length,
+    const right_shift_t b_shr,
+    const unsigned depth);
+
+*/
+
+
+#define NSTACKVECTS     (3)
+#define NSTACKWORDS     (12+8*(NSTACKVECTS)+4)
+
+#define FUNCTION_NAME   vect_s32_sqrt
+
+// Temporary vector needed because there's no instruction to do vR[] * vR[]
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+// Holds the shifted values of b[] while we're solving it.
+#define STACK_VEC_TARGET    (NSTACKWORDS-16-4)
+// Holds the power of 2 that is currently being worked on inside hte inner loop.
+// @todo If we had an instruction that set each vR[k] to the value of a register, this wouldn't be needed.
+#define STACK_VEC_POW       (NSTACKWORDS-24-4)
+
+#define STACK_DEPTH     0
+
+#define a           x10
+#define b           x11
+#define length      x12
+#define b_shr       x13
+
+#define depth       x18
+#define mask_vec    x19
+#define _32         x20
+#define _1          x21
+#define tmp         x24
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,24
+    sw a4, (STACK_DEPTH)*4                (sp)
+// Set VPU mode to 32-bit
+// (length << 2) is the length of the vector in bytes.
+{   li t3, 0                              ;   sw s8, 4                          (sp)}
+{   slli length, length, 2                   ;   xm.vsetc t3}
+
+// Maximum supported depth is 31
+{   li tmp, 31                             ;   lw t3, (STACK_DEPTH)*4                (sp)}
+{   xm.assert t3                              ; nop                                           }
+{   xm.sltu t3, tmp, t3                       ;   li _1, 1                               }
+{   li _32, 32                             ;   xm.brff t3, .L_vect_loop_top                }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   sw tmp, (STACK_DEPTH)*4                (sp)}
+
+
+.L_vect_loop_top:
+
+    // mask_vec is a byte mask for the elements of a[] that we're currently working on.
+    // using VSTRPV with mask_vec prevents us from corrupting the headroom register.
+    // depth is the number of MSBs that we're solving for
+    {   xm.mkmsk mask_vec, length                  ;   lw depth, (STACK_DEPTH)*4              (sp)}
+
+    // First initialize the target vector using b[]
+    // (Doing this first allows this function to operate in-place on b[] if desired)
+    // @todo If we wanted to, we could do a VSIGN + VLMUL here to take an absolute value of each b[k],
+    //       since this function will not work for any negative b[k].
+        xm.vlashr b, b_shr
+    {   addi t3,sp, (STACK_VEC_TARGET)*4          ;   add b, b, _32                           }
+        xm.vstrpv t3, mask_vec
+
+    // Initialize the result (a[]) with 0's
+    { nop                                           ;   xm.vclrdr                                  }
+        xm.vstrpv a, mask_vec
+
+    // VEC_POW[] is the bit we're currently solving for. Initialize to the first non-sign bit.
+    // (The VSTD is to zero out the VEC_POW[] elements that are going to be masked out, because
+    //  we're going to use VEC_POW[] later to update the headroom register)
+    la t3, vpu_vec_0x40000000
+    {   addi t3,sp, (STACK_VEC_POW)*4             ;   xm.vldr t3}
+    { nop                                           ;   xm.vstd t3}
+        xm.vstrpv t3, mask_vec 
+
+    // This saves us a few cycles on the first iteration (because of loop alignment, we'd need a 
+    // 'bu .L_sqrt_loop_top' here even if we didn't want to skip ahead). It's necessary because 
+    // we don't want to right-shift VEC_POW[] on the first iteration (it's already 2^30), and we 
+    // can't fix that by initializing VEC_POW[] to 0x80000000 above because that's negative and 
+    // VLASHR is an arithmetic shift.
+    {   addi t3,sp, (STACK_VEC_TARGET)*4          ;   xm.bu .L_first_iter                        }
+
+    // Inner loop. Iteratively solving for the square root bit-by-bit
+    // 12 instructions + 1 FNOP
+    .p2align 4
+    .L_sqrt_loop_top:
+
+        // Load the next power of 2 and store it back to VEC_POW[]
+            xm.vlashr t3, _1
+            xm.vstrpv t3, mask_vec
+
+        // Add the current power of 2 to each a[] to get the next value to be tested.
+        // test[k] <-- a[k] + VEC_POW
+        {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vladd a}
+
+
+        // vR[] contains the values we're testing. Store it and square it
+        // vR[k] <-- ( test[k] * test[k] ) >> 30
+            xm.vstrpv t3, mask_vec
+        {   addi t3,sp, (STACK_VEC_TARGET)*4          ;   xm.vlmul0 t3}
+
+        .L_first_iter:
+
+        // Subtract the squared test values from the target vector   
+        // vR[k] <-- target[k] - (( test[k] * test[k] ) >> 30)
+        { nop                                           ;   xm.vlsub t3}
+
+        // If vR[k] is negative, the test value was too large, so we don't want to update those a[k]
+        // for which vR[k] is negative.
+
+        //  vR[k] = a[k] + MAX( signum( vR[k] ), 0 ) * VEC_POW[k]
+
+        {   addi depth, depth, -1                     ;   xm.vsign                                   }
+        {   addi t3,sp, (STACK_VEC_POW)*4             ;   xm.vpos                                    }
+        { nop                                           ;   xm.vlmul0 t3}
+        { nop                                           ;   xm.vladd a}
+
+        // Store the updated results in a[]
+            xm.vstrpv a, mask_vec
+        { nop                                           ;   xm.bt depth, .L_sqrt_loop_top              }
+    .L_sqrt_loop_bot:
+
+    // a[] now contains the results, but we haven't updated the headroom register because we've only
+    // been using VSTRPV. So, update the headroom register
+    // @todo Do we need to update the headroom register? Aren't we more or less guaranteed there's no
+    // headroom, because we got rid of the headroom of b[]? Should work out the math on this later.
+    
+    // We used mask_vec when initializing VEC_POW[], so we can use that here to avoid corrupting
+    // the headroom register with data that comes after a[]. x28 is already pointing at VEC_POW[].
+        xm.vstrpv t3, mask_vec
+    {   sub length, length, _32                 ;   xm.vldr t3}
+
+    // If (length - 32) < 1 we're done.
+    {   xm.slt tmp, length, _1                     ;   xm.vstr t3}
+    {   add a, a, _32                           ;   nop             }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    beqz tmp, .L_vect_loop_top 
+.L_vect_loop_bot:
+
+.L_finish:
+
+    xm.lddsp  s3,s2,8
+    xm.lddsp  s5,s4,16
+    xm.lddsp  s7,s6,24
+{   li a0, 31                              ;   xm.vgetc t3}
+{   xm.zexti t3, 5                             ;   lw s8, 4                      (sp)}
+{   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sum.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sum.S
new file mode 100644
index 00000000..381395b0
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sum.S
@@ -0,0 +1,98 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+
+/*
+    int64_t vect_s32_sum(
+        const int32_t b[],
+        const unsigned length);
+*/
+
+
+#include "../asm_helper.h"
+
+
+#define FUNCTION_NAME   vect_s32_sum
+#define NSTACKWORDS     (16+4)
+
+
+#define STACK_VEC_TMP       (NSTACKWORDS-8-4)
+
+#define b           x10
+#define N           x11
+#define tail        x12
+
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+
+    {   li t3, 0                              ; nop                                           }
+    {   slli tail, N, SIZEOF_LOG2_S32            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, N, SIZEOF_LOG2_S32            \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.vclrdr                                  }
+    {   srli N, N, EPV_LOG2_S32                  ;   xm.brff tail, .L_tail_dealt_with             }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri N, N, EPV_LOG2_S32                  \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+    la t3, vpu_vec_0x40000000 
+    {   addi s2,sp, (STACK_VEC_TMP)*4              ;   xm.vldr t3}
+    { nop                                           ;   xm.vstd s2}
+    {   xm.mkmsk tail, tail                        ;   slli N, N, 3                             }
+        xm.vstrpv s2, tail
+        sh2add s3, N, b              
+    { nop                                           ;   xm.vldc s2}
+    { nop                                           ;   xm.vclrdr                                  }
+    {   srli N, N, 3                             ;   xm.vlmacc0 s3}
+    {   li t3, 32                             ;   xm.vldc t3}
+
+.L_tail_dealt_with:
+    la t3, vpu_vec_0x40000000
+    {   li t3, 32                             ;   xm.vldc t3}
+    {   addi a2,sp, (STACK_VEC_TMP)*4              ;   xm.brff N, .L_loop_bot                       }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+.L_loop_top:
+        {   addi N, N, -1                             ;   xm.vlmacc0 b}
+        {   add b, b, t3                           ;   xm.bt N, .L_loop_top                       }
+.L_loop_bot:
+
+.L_finish:
+
+    // Requires vC to be filled with 0x40000000, which it already should be.
+  
+    la t3, vpu_vec_0x80000000   
+    { nop                                           ;   xm.vstr a2}
+    { nop                                           ;   xm.vlmacc0 t3}
+    
+    la t3, vpu_vec_zero  
+    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vldr t3}
+    { nop                                           ;   xm.vlmaccr0 t3}
+    { nop                                           ;   xm.vstd t3}
+    { nop                                           ;   xm.vlmaccr0 t3}
+    { nop                                           ;   xm.vstr t3}
+    { nop                                           ;   lw a1,0                          ( t3)}
+    {   addi a1, a1, 8                           ;   lw a0,4                          ( t3)}
+
+        xm.lddsp  s3,s2,0
+        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_fend: 
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_fend - FUNCTION_NAME
+
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_f32.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_f32.S
new file mode 100644
index 00000000..cee1739c
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_f32.S
@@ -0,0 +1,91 @@
+// Copyright 2022-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+    
+#if defined(__VX4B__)
+
+.text
+
+/*
+
+  void vect_s32_to_vect_f32(
+      float a[],
+      const int32_t b[], 
+      const unsigned length, 
+      const exponent_t b_exp);
+
+*/
+
+#define NSTACKWORDS 8
+#define FUNC_NAME vect_s32_to_vect_f32
+    
+.globl	FUNC_NAME
+.type	FUNC_NAME,@function
+
+#define a       x10
+#define b       x11
+#define len     x12
+#define b_exp   x13
+
+#define _0      x18
+#define tmp1    x19
+#define tmp0    x20
+
+.p2align 4
+FUNC_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,0
+  { li _0, 0                   ; li s3, 23                  }
+  { add b_exp, b_exp, s3        ; nop                             }
+    xm.stdsp  s5,s4,8
+
+  // handle tail first
+  { srli t3, len, 1             ; xm.zexti len, 1                 }
+  { mv len, t3                ; xm.brff len, .L_pre_loop         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    
+    xm.ldd  tmp0,tmp1, len(b)
+    { xm.slt t3, tmp0, _0           ; slli tmp1, len, 1            }
+    beqz t3, .L_posT
+      xm.neg tmp0, tmp0
+    .L_posT:
+    xm.fmake tmp0, t3, b_exp, _0, tmp0
+    xm.stw tmp0,tmp1( a)/* XAT Warning: "Falling back on assumption: the int < 12 for the integer value of the item at position 2 in the instruction's operands in stwi tmp0, a,tmp1\nMessage: The offset can be encoded in s2rus immediate" */
+
+  .L_pre_loop:
+
+  { addi len, len, -1               ; xm.brff len, .L_loop_end           }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  
+  .L_loop:
+    xm.ldd  tmp0,tmp1, len(b)
+  { xm.slt t3, tmp1, _0             ; nop                               }
+    beqz t3, .L_pos1
+      xm.neg tmp1, tmp1
+    .L_pos1:
+    xm.fmake tmp1, t3, b_exp, _0, tmp1
+    slt t3, tmp0, _0
+    beqz t3, .L_pos0
+      xm.neg tmp0, tmp0
+    .L_pos0:
+    xm.fmake tmp0, t3, b_exp, _0, tmp0
+    xm.std  tmp0,tmp1, len(a)
+    { addi len, len, -1             ; xm.bt len, .L_loop               }
+  .L_loop_end:
+    
+    xm.lddsp  s3,s2,0
+    xm.lddsp  s5,s4,8
+    xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+	
+	// RETURN_REG_HOLDER
+	.set	FUNC_NAME.nstackwords,NSTACKWORDS
+	.globl	FUNC_NAME.nstackwords
+	.set	FUNC_NAME.maxcores,1
+	.globl	FUNC_NAME.maxcores
+	.set	FUNC_NAME.maxtimers,0
+	.globl	FUNC_NAME.maxtimers
+	.set	FUNC_NAME.maxchanends,0
+	.globl	FUNC_NAME.maxchanends
+.Ltmp1:
+	.size	FUNC_NAME, .Ltmp1-FUNC_NAME
+
+        
+#endif
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_s16.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_s16.S
new file mode 100644
index 00000000..ecd35995
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_s16.S
@@ -0,0 +1,77 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+
+
+#if defined(__VX4B__)
+
+
+/*  
+void vect_s32_to_vect_s16(
+    int16_t a[],
+    const int32_t b[],
+    const unsigned length,
+    const int v_shr);
+*/
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (8)
+
+#define FUNCTION_NAME   vect_s32_to_vect_s16
+
+#define a           x10
+#define b           x11
+#define len         x12
+#define b_shr       x13
+#define tail        x18
+#define _16         x19
+
+
+
+FUNCTION_NAME:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.stdsp  s3,s2,0
+    {   li t3, 0                              ;   li _16, 16                         }
+    {   sub b_shr, b_shr, _16                   ;   xm.vsetc t3}
+    {   srli len, len, EPV_LOG2_S32              ;   slli tail, len, SIZEOF_LOG2_S16      }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, len, SIZEOF_LOG2_S16      \nMessage: The shift amount is not 32" */
+    { nop                                           ;   xm.zexti tail, 4                        }
+    {   xm.mkmsk tail, tail                        ;   xm.brff len, .L_loop_bot                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   xm.mkmski t3, 16                           ;   xm.bu .L_loop_top                      }
+
+.p2align 4
+.L_loop_top:
+        xm.vlashr b, b_shr
+    {   add b, b, _16                           ;   xm.vdepth16                            }
+    {   addi len, len, -1                         ;   add b, b, _16                       }
+        xm.vstrpv a, t3
+    {   add a, a, _16                           ;   xm.bt len, .L_loop_top                 }
+.L_loop_bot:
+
+    { nop                                           ;   xm.brff tail, .L_finish                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                              }
+        xm.vlashr b, b_shr
+    { nop                                           ;   xm.vdepth16                            }
+        xm.vstrpv a, tail
+
+.L_finish:
+        xm.lddsp  s3,s2,0
+    { nop                                           ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_unzip.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_unzip.S
new file mode 100644
index 00000000..66132482
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_unzip.S
@@ -0,0 +1,69 @@
+// Copyright 2021-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+    void vect_s32_unzip(
+        int32_t a[],
+        int32_t b[],
+        const complex_s32_t c[],
+        const unsigned length);
+*/
+
+
+#define NSTACKWORDS     (4+4)
+
+#define FUNCTION_NAME   vect_s32_unzip
+
+
+
+#define a         x10
+#define b         x11
+#define c         x12
+#define len       x13
+
+#define tmpA      x18
+#define tmpB      x19
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,0
+
+  { addi len, len, -1                           ; xm.brff len, .L_finish                         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+  .L_loop_top:
+      xm.ldd  tmpA,tmpB, len(c)
+    xm.stw tmpA,len                          ( a)
+    xm.stw tmpB,len                          ( b)
+    { addi len, len, -1                           ; xm.bt len, .L_loop_top                       }
+
+  .L_finish:
+      xm.lddsp  s3,s2,0
+    { nop                                           ; xm.retsp (NSTACKWORDS)*4                         } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_zip.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_zip.S
new file mode 100644
index 00000000..7f33d758
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_zip.S
@@ -0,0 +1,143 @@
+// Copyright 2021-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+    void vect_s32_zip(
+        complex_s32_t a[],
+        const int32_t b[],
+        const int32_t c[],
+        const unsigned length,
+        const right_shift_t b_shr,
+        const right_shift_t c_shr);
+*/
+
+
+#define NSTACKWORDS     (8+2*8+4)
+
+#define FUNCTION_NAME   vect_s32_zip
+
+#define STACK_VEC_C     (NSTACKWORDS-8-4)
+#define STACK_VEC_B     (NSTACKWORDS-16-4)
+
+
+#define a         x10
+#define b         x11
+#define c         x12
+#define len       x13
+#define b_shr     x18
+#define c_shr     x19
+
+#define vec_B     x20
+#define vec_C     x21
+#define _28       x22
+#define _32       x23
+
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,0
+    xm.stdsp  s5,s4,8
+    xm.stdsp  s7,s6,16
+  { li t3, 0                                ; sw s8, 24                            (sp)}
+  { slli t3, len, SIZEOF_LOG2_S32             ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli t3, len, SIZEOF_LOG2_S32             \nMessage: The shift amount is not 32" */
+  { xm.zexti t3, 5                               ; srli len, len, EPV_LOG2_S32                }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S32                \nMessage: The shift amount is not 32" */
+  { addi vec_B,sp, (STACK_VEC_B)*4               ; sw t3, 28                            (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */
+  { li t3, 2                                ; xm.vclrdr                                    }
+{ xm.bitrev t3, t3                           ; nop}
+xm.vstd vec_B
+  { addi vec_C,sp, (STACK_VEC_C)*4               ; sw t3,0                         ( vec_B)}
+  { li _32, 32                               ; xm.vldc vec_B}
+  mv b_shr, a4
+  { li _28, 28                               ; nop}
+
+  mv c_shr,a5
+  { nop                                           ; xm.brff len, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+//  { nop                                           ; xm.bu .L_loop_top                            }
+
+  .p2align 4
+  .L_loop_top:
+      xm.vlashr b, b_shr
+    { add b, b, _32                             ; xm.vstr vec_B}
+      xm.vlashr c, c_shr
+    { add a, a, _32                             ; addi len, len, -1                           }
+
+    { add vec_C, vec_C, _28                     ; xm.vstr vec_C}
+    { add vec_B, vec_B, _28                     ; xm.vclrdr                                    }
+    { addi vec_C, vec_C, -4                       ; xm.vlmaccr0 vec_C}
+    { addi vec_B, vec_B, -4                       ; xm.vlmaccr0 vec_B}
+
+    { addi vec_C, vec_C, -4                       ; xm.vlmaccr0 vec_C}
+    { addi vec_B, vec_B, -4                       ; xm.vlmaccr0 vec_B}
+    { addi vec_C, vec_C, -4                       ; xm.vlmaccr0 vec_C}
+    { addi vec_B, vec_B, -4                       ; xm.vlmaccr0 vec_B}
+    //FNOP
+    { addi vec_C, vec_C, -4                       ; xm.vlmaccr0 vec_C}
+    { addi vec_B, vec_B, -4                       ; xm.vlmaccr0 vec_B}
+    { sub t3, a, _32                           ; xm.vstr a}
+    { add a, a, _32                             ; xm.vclrdr                                    }
+
+    { addi vec_C, vec_C, -4                       ; xm.vlmaccr0 vec_C}
+    { addi vec_B, vec_B, -4                       ; xm.vlmaccr0 vec_B}
+    { addi vec_C, vec_C, -4                       ; xm.vlmaccr0 vec_C}
+    { addi vec_B, vec_B, -4                       ; xm.vlmaccr0 vec_B}
+    //FNOP
+    { addi vec_C, vec_C, -4                       ; xm.vlmaccr0 vec_C}
+    { addi vec_B, vec_B, -4                       ; xm.vlmaccr0 vec_B}
+    { addi vec_C, vec_C, -4                       ; xm.vlmaccr0 vec_C}
+    { addi vec_B, vec_B, -4                       ; xm.vlmaccr0 vec_B}
+    
+    { add c, c, _32                             ; nop                                           }
+    { addi vec_B,sp, (STACK_VEC_B)*4               ; xm.vstr t3}
+    { addi vec_C,sp, (STACK_VEC_C)*4               ; xm.bt len, .L_loop_top                       }
+  .L_loop_bot:
+  
+  { nop                                           ; lw len, 28                              (sp)}
+  { srli len, len, SIZEOF_LOG2_S32             ; xm.brff len, .L_finish                           }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, SIZEOF_LOG2_S32             \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    xm.vlashr b, b_shr
+  { nop                                           ; xm.vstr vec_B}
+    xm.vlashr c, c_shr
+  { addi len, len, -1                           ; xm.vstr vec_C}
+
+#define tmpB  x22
+#define tmpC  x23
+  .L_tail_loop_top:
+    { nop                                           ; xm.ldw tmpB,len                        ( vec_B)}
+    { nop                                           ; xm.ldw tmpC,len                        ( vec_C)}
+      xm.std  tmpB,tmpC, len(a)
+    { addi len, len, -1                           ; xm.bt len, .L_tail_loop_top                    }
+  .L_tail_loop_bot:
+
+  .L_finish:
+      xm.lddsp  s3,s2,0
+      xm.lddsp  s5,s4,8
+      xm.lddsp  s7,s6,16
+    { nop                                           ; lw s8, 24                            (sp)}
+    { nop                                           ; xm.retsp (NSTACKWORDS)*4                         } 
+
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_s8/vect_s8_is_negative.S b/lib_xcore_math/src/arch/vx4b/vect_s8/vect_s8_is_negative.S
new file mode 100644
index 00000000..bdf06501
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_s8/vect_s8_is_negative.S
@@ -0,0 +1,81 @@
+// Copyright 2021-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+void vect_s8_is_negative(
+    int8_t a[],
+    const int8_t b[],
+    const unsigned len);
+*/
+
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (8)
+
+#define FUNCTION_NAME   vect_s8_is_negative
+
+#define a           x10 
+#define b           x11 
+#define len         x12
+#define _32         x13
+#define vec_0xC1    x18
+#define tail        x19
+
+.text
+.p2align 2
+
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    li t3, 0x200
+    xm.stdsp  s3,s2,8
+  { mv t3, len                              ; xm.vsetc t3}
+  { xm.zexti t3, 5                               ; srli len, len, EPV_LOG2_S8                 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S8                 \nMessage: The shift amount is not 32" */
+  { li _32, 32                               ; xm.mkmsk tail, t3                           }
+lui t3, %hi(vpu_vec_0xC1)
+    addi t3,t3, %lo(vpu_vec_0xC1)
+  { mv vec_0xC1, t3                         ; mv t3, b                                }
+  { nop                                           ; xm.brff len, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+  .L_loop_top:
+    { add t3, t3, _32                         ; xm.vldr t3}
+    { nop                                           ; xm.vsign                                     }
+    { nop                                           ; xm.vlsub vec_0xC1}
+    { nop                                           ; xm.vpos                                      }
+    { addi len, len, -1                           ; xm.vstr a}
+    { add a, a, _32                             ; xm.bt len, .L_loop_top                       }
+  .L_loop_bot:
+
+  { nop                                           ; xm.brff tail, .L_finish                        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { nop                                           ; xm.vldr t3}
+  { nop                                           ; xm.vsign                                     }
+  { nop                                           ; xm.vlsub vec_0xC1}
+  { nop                                           ; xm.vpos                                      }
+    xm.vstrpv a, tail
+
+    
+.L_finish:
+        xm.lddsp  s3,s2,8
+    { nop                                           ; xm.retsp (NSTACKWORDS)*4                     } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_add.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_add.S
new file mode 100644
index 00000000..1dee782c
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_add.S
@@ -0,0 +1,156 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (32)
+#define STACK_VEC_TMP   (NSTACKWORDS-8-1)
+#define STACK_BYTEMASK  8
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+
+#define shr_b       x18
+#define shr_c       x19
+#define _32         x20
+#define tmp_vec     x21
+#define bytemask    len
+
+
+
+
+
+/*  
+headroom_t vect_s16_add(
+    int16_t a[],
+    const int16_t b[],
+    const int16_t c[],
+    const unsigned len,
+    const int b_shr,
+    const int c_shr);
+*/
+vect_s16_add:
+FNAME_S16:
+        xm.entsp (NSTACKWORDS)*4
+        li t3, 0x100
+    {   slli t3, len, SIZEOF_LOG2_S16           ;   xm.vsetc t3}
+    {   xm.zexti t3, 5                             ;   srli len, len, EPV_LOG2_S16          }
+        { nop                                           ;   xm.bu .L_apply_op                          }
+.L_func_end_s16: 
+
+
+
+
+/*
+headroom_t vect_s32_add(
+    int32_t a[],
+    const int32_t b[],
+    const int32_t c[],
+    const unsigned len,
+    const int b_shr,
+    const int c_shr);
+*/
+vect_s32_add:
+FNAME_S32:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   li t3, 0                              ; nop                                           }
+    {   slli t3, len, SIZEOF_LOG2_S32           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S32           \nMessage: The shift amount is not 32" */
+    {   xm.zexti t3, 5                             ;   srli len, len, EPV_LOG2_S32              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    { nop                                           ;   xm.bu .L_apply_op                          }
+.L_func_end_s32: 
+
+
+
+
+
+
+/*
+    Code shared by all functions above
+*/
+.type .L_apply_op,@function
+.L_apply_op:
+
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+    {   li _32, 32                             ;   xm.vclrdr                              }
+    mv shr_c, a5 
+    mv shr_b, a4 
+    {   xm.mkmsk t3, t3                          ;   nop   }
+    {   addi tmp_vec,sp, (NSTACKWORDS-8-1)*4         ;   nop}
+    {   xm.mkmski t3, 32                           ;   sw t3, (STACK_BYTEMASK)*4         (sp)}
+    { nop                                           ;   xm.brff len, .L_loop_bot                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                      }
+
+.p2align 4
+.L_loop_top:
+            xm.vlashr b, shr_b
+            xm.vstrpv tmp_vec, t3
+            xm.vlashr c, shr_c
+        {   add b, b, _32                           ; nop                                       } 
+        {   add c, c, _32                           ;   xm.vladd tmp_vec}  
+        {   addi len, len, -1                         ;   xm.vstr a}
+        {   add a, a, _32                           ;   xm.bt len, .L_loop_top                 }
+.L_loop_bot:
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4    (sp)}
+    { nop                                           ;   xm.brff bytemask, .L_finish              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, t3 
+        xm.vlashr c, shr_c
+    {   mv t3, tmp_vec                        ;   xm.vladd tmp_vec}
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vstrpv tmp_vec, bytemask
+        xm.vstrpv a, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+    xm.lddsp  s3,s2,0
+    xm.lddsp  s5,s4,8
+
+    // Should work for both 16 and 32 bit modes
+    {   li a0, 32                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   srli a1, t3, 8                      }
+    {   xm.shr a0, a0, a1                          ;   addi t3, t3, 1                     }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+.L_end_apply_op: 
+    .size .L_apply_op, .L_end_apply_op - .L_apply_op
+
+
+
+.global vect_s16_add
+.type vect_s16_add,@function
+.set vect_s16_add.nstackwords,NSTACKWORDS;  .global vect_s16_add.nstackwords /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s16_add.maxcores,1;               .global vect_s16_add.maxcores /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s16_add.maxtimers,0;              .global vect_s16_add.maxtimers /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s16_add.maxchanends,0;            .global vect_s16_add.maxchanends /* Translation error on this line: unexpected token at position 31. */ 
+.size vect_s16_add, .L_func_end_s16 - vect_s16_add
+
+.global vect_s32_add
+.type vect_s32_add,@function
+.set vect_s32_add.nstackwords,NSTACKWORDS;  .global vect_s32_add.nstackwords /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s32_add.maxcores,1;               .global vect_s32_add.maxcores /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s32_add.maxtimers,0;              .global vect_s32_add.maxtimers /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s32_add.maxchanends,0;            .global vect_s32_add.maxchanends /* Translation error on this line: unexpected token at position 31. */ 
+.size vect_s32_add, .L_func_end_s32 - vect_s32_add
+
+
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_headroom.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_headroom.S
new file mode 100644
index 00000000..3aeadfa4
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_headroom.S
@@ -0,0 +1,134 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+
+/*  
+headroom_t vect_s16_headroom(
+    const int16_t* v, 
+    const unsigned length);
+*/
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKWORDS     12
+
+#define STACK_TMP_VEC   0
+
+#define arg_v       x10
+#define arg_len     x11
+
+
+#define FUNCTION_NAME vect_s16_headroom
+
+FUNCTION_NAME:
+    {   li t3, 32                             ;   xm.entsp (NSTACKWORDS)*4               }
+    {   slli t3, t3, 3                         ;   xm.vclrdr                              }
+    {   slli a2, arg_len, 1                      ;   xm.vsetc t3}
+    {   srli arg_len, arg_len, 4                 ;   mv t3, arg_v                      }
+
+    {   addi a0,sp, (STACK_TMP_VEC)*4              ; nop                                       }
+    {   li a3, 32                              ; nop                                       }
+    {   xm.zexti a2, 5                              ;   xm.brff arg_len, .L_loop_bot_s16         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top_s16                  }
+     
+    .p2align 4
+    .L_loop_top_s16:
+        {   add t3, t3, a3                        ;   xm.vldr t3}
+        {   addi arg_len, arg_len, -1                 ;   xm.vstr a0}
+        { nop                                           ;   xm.bt arg_len, .L_loop_top_s16         }
+
+.L_loop_bot_s16:
+    { nop                                           ;   xm.brff a2, .L_finish16                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   xm.mkmsk a2, a2                            ;   xm.vstd a0}
+    {   mv t3, a0                             ;   xm.vldr t3}
+    xm.vstrpv a0, a2
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish16:
+    {   li a0, 15                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                       }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   }
+
+
+.L_func_end_s16:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end_s16 - FUNCTION_NAME
+
+
+#undef FUNCTION_NAME
+
+
+
+
+
+
+
+/*  
+headroom_t vect_s32_headroom(
+    const int32_t* v,
+    const unsigned length);
+*/
+
+#define FUNCTION_NAME vect_s32_headroom
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+
+    {   li t3, 0                              ;   slli a2, arg_len, 2                  }
+    {   srli arg_len, arg_len, 3                 ;   xm.vsetc t3}
+    {   addi a0,sp, (STACK_TMP_VEC)*4              ;   mv t3, arg_v                      }
+    {   li a3, 32                              ;   xm.vclrdr                              }
+    {   xm.zexti a2, 5                              ;   xm.brff arg_len, .L_loop_bot_s32         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top_s32                  }
+
+    .p2align 4
+    .L_loop_top_s32:
+        {   add t3, t3, a3                        ;   xm.vldr t3}
+        {   addi arg_len, arg_len, -1                 ;   xm.vstr a0}
+        { nop                                           ;   xm.bt arg_len, .L_loop_top_s32         }
+
+    .L_loop_bot_s32:
+    { nop                                           ;   xm.brff a2, .L_finish32                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   xm.mkmsk a2, a2                            ;   xm.vstd a0}
+    {   mv t3, a0                             ;   xm.vldr t3}
+    xm.vstrpv a0, a2
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+    .L_finish32:
+    {   li a0, 31                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ; nop                                       }
+    {sub a0, a0, t3; nop}
+
+    {      nop                      ;   xm.retsp (NSTACKWORDS)*4                   }
+
+.L_func_end_s32:
+
+.globl FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end_s32 - FUNCTION_NAME
+
+#undef FUNCTION_NAME
+
+
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_rect.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_rect.S
new file mode 100644
index 00000000..16e5b664
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_rect.S
@@ -0,0 +1,138 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+
+#include "../asm_helper.h"
+
+.text
+.p2align 2
+
+#define NSTACKWORDS     (8+4)
+
+#define STACK_TMP_VEC       (NSTACKWORDS-8-4)
+
+#define a           x10
+#define b           x11
+#define len         x12
+#define tail        x13
+
+
+
+
+/*  
+headroom_t vect_s16_rect(
+    int16_t a[],
+    const int16_t b[],
+    const unsigned length);
+*/
+
+vect_s16_rect:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        li t3, 0x0100
+    {   slli tail, len, SIZEOF_LOG2_S16          ;   srli len, len, EPV_LOG2_S16              }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, len, SIZEOF_LOG2_S16          \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.vsetc t3}
+    { nop                                           ;   xm.bu .L_apply_op                          }
+
+.L_func_end_s16:
+
+
+
+
+
+/*  
+headroom_t vect_s32_rect(
+    int32_t a[],
+    const int32_t b[],
+    const unsigned length);
+*/
+
+vect_s32_rect:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   li t3, 0                              ;   slli tail, len, SIZEOF_LOG2_S32          }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli tail, len, SIZEOF_LOG2_S32          \nMessage: The shift amount is not 32" */
+    {   srli len, len, EPV_LOG2_S32              ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    {   xm.zexti tail, 5                            ;   xm.bu .L_apply_op                          }
+
+.L_func_end_s32:
+
+
+
+
+#undef a
+#undef b
+#undef len
+
+/*
+    When branching here:
+        *   a --> x10
+        *   b --> x11
+        *   loop_count --> x12
+        *   tail --> x13
+        *   VPU mode must already be set.
+*/
+
+#define a           x10
+#define b           x11
+#define loop_count  x12
+#define tail        x13
+
+.type .L_apply_op,@function
+
+.L_apply_op:
+
+    {   xm.mkmsk tail, tail                        ; nop                                           }
+    {   mv t3, b                              ;   xm.brff loop_count, .L_loop_bot              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   li a1, 32                              ;   xm.bu .L_loop_top                          }
+.p2align 4
+.L_loop_top:
+        {   add t3, t3, a1                        ;   xm.vldr t3}
+        {   addi loop_count, loop_count, -1           ;   xm.vpos                                    }
+        {   add a, a, a1                            ;   xm.vstr a}
+        { nop                                           ;   xm.bt loop_count, .L_loop_top              }
+.L_loop_bot:
+
+    { nop                                           ;   xm.brff tail, .L_finish                      }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                                  }
+    {   addi t3,sp, (STACK_TMP_VEC)*4             ;   xm.vldr t3}
+    { nop                                           ;   xm.vstd t3}
+    { nop                                           ;   xm.vpos                                    }
+        xm.vstrpv t3, tail
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+        xm.vstrpv a, tail
+
+.L_finish:
+    {   li a0, 32                              ;   xm.vgetc t3}
+    {   srli a1, t3, 8                          ; nop                                           }
+    {   xm.zexti t3, 5                             ;   xm.shr a0, a0, a1                          }
+    {   addi t3, t3, 1                         ; nop                                           }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_end_apply_op: 
+.size .L_apply_op, .L_end_apply_op - .L_apply_op
+
+
+
+
+
+.global vect_s16_rect
+.type vect_s16_rect,@function
+.set vect_s16_rect.nstackwords,NSTACKWORDS;  .global vect_s16_rect.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set vect_s16_rect.maxcores,1;               .global vect_s16_rect.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s16_rect.maxtimers,0;              .global vect_s16_rect.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set vect_s16_rect.maxchanends,0;            .global vect_s16_rect.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size vect_s16_rect, .L_func_end_s16 - vect_s16_rect
+
+.global vect_s32_rect
+.type vect_s32_rect,@function
+.set vect_s32_rect.nstackwords,NSTACKWORDS;  .global vect_s32_rect.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set vect_s32_rect.maxcores,1;               .global vect_s32_rect.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s32_rect.maxtimers,0;              .global vect_s32_rect.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set vect_s32_rect.maxchanends,0;            .global vect_s32_rect.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size vect_s32_rect, .L_func_end_s32 - vect_s32_rect
+
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_add_scalar.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_add_scalar.S
new file mode 100644
index 00000000..a1989a7a
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_add_scalar.S
@@ -0,0 +1,112 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+/*  
+
+unsigned vect_sXX_add_scalar(
+    int32_t a[],
+    const int32_t b[],
+    const unsigned length_bytes,
+    const int32_t c,
+    const int32_t d,
+    const right_shift_t b_shr,
+    const unsigned mode_bits);
+
+*/
+
+
+#define NSTACKVECTS     (1)
+#define NSTACKWORDS     (8+8*(NSTACKVECTS))
+
+#define FUNCTION_NAME   vect_sXX_add_scalar
+
+
+#define STACK_VEC_TEMP      (NSTACKWORDS-12)
+
+#define a           x10 
+#define b           x11 
+#define len         x12
+#define c           x13
+#define b_shr       x18
+#define _32         x19
+#define tail        x20
+
+
+.text
+.p2align 2
+
+FUNCTION_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.stdsp  s3,s2,0
+    xm.stdsp  s5,s4,8
+
+  mv t3, a4
+  { li _32, 32                               ; nop                      }
+  //{ li _32, 32                               ; lw t3, (STACK_D)*4                      (sp)}
+    xm.stdsp  c,t3,((STACK_VEC_TEMP/2)+0)*8
+    xm.stdsp  c,t3,((STACK_VEC_TEMP/2)+1)*8
+    xm.stdsp  c,t3,((STACK_VEC_TEMP/2)+2)*8
+    xm.stdsp  c,t3,((STACK_VEC_TEMP/2)+3)*8
+
+#undef  c   // no longer needed
+#define vec_tmp   x13
+
+  mv t3, a6
+  { addi vec_tmp,sp, (STACK_VEC_TEMP)*4          ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+  //{ addi vec_tmp,sp, (STACK_VEC_TEMP)*4          ; lw t3, (STACK_MODE_BITS)*4              (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+  { mv tail, len                             ; xm.vsetc t3}
+  mv b_shr, a5
+  { srli len, len, 5                           ; nop}
+  //{ srli len, len, 5                           ; lw b_shr, (STACK_B_SHR)*4                (sp)}
+  { xm.zexti tail, 5                              ; xm.brff len, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+
+  .L_loop_top:
+      xm.vlashr b, b_shr
+    { addi len, len, -1                           ; xm.vladd vec_tmp}
+    { add b, b, _32                             ; xm.vstr a}
+    { add a, a, _32                             ; xm.bt len, .L_loop_top                       }
+  .L_loop_bot:
+
+  { xm.mkmsk tail, tail                          ; xm.brff tail, .L_finish                        }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { nop                                           ; xm.vclrdr                                    }
+    xm.vlashr b, b_shr
+  { nop                                           ; xm.vladd vec_tmp}
+  { nop                                           ; xm.vstd vec_tmp}
+    xm.vstrpv a, tail
+
+  // These three are because the headroom mask doesn't get updated by VSTRPV
+    xm.vstrpv vec_tmp, tail
+  { nop                                           ; xm.vldd vec_tmp}
+  { nop                                           ; xm.vstd vec_tmp}
+
+
+.L_finish:
+    xm.lddsp  s3,s2,0
+    xm.lddsp  s5,s4,8
+
+  { nop                                           ;   xm.vgetc t3}
+  {   xm.zexti t3, 5                             ; nop                                           }
+  {   mv a0, t3                             ;   xm.retsp (NSTACKWORDS)*4                       } 
+
+.L_func_end:
+
+
+.global FUNCTION_NAME
+.type FUNCTION_NAME,@function
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
+
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_max_elementwise.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_max_elementwise.S
new file mode 100644
index 00000000..55e62654
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_max_elementwise.S
@@ -0,0 +1,192 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (8+8+4)
+
+#define len         x13
+
+.text
+.p2align 4
+
+
+/*  
+headroom_t vect_s32_max_elementwise(
+    int32_t a[],
+    const int32_t b[],
+    const int32_t c[],
+    const unsigned len,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+*/
+#define FUNC_NAME   vect_s32_max_elementwise
+FUNC_NAME:
+  { li t3, 0                              ; xm.entsp (NSTACKWORDS)*4                   }
+  { slli t3, len, SIZEOF_LOG2_S32           ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli t3, len, SIZEOF_LOG2_S32           \nMessage: The shift amount is not 32" */
+  { xm.zexti t3, 5                             ; srli len, len, EPV_LOG2_S32              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    call vect_sXX_max_elementwise
+  { li t3, 31                             ; nop                                         }
+  { sub a0, t3, a0                         ; xm.retsp (NSTACKWORDS)*4                       }
+.L_end_s32:
+
+.global FUNC_NAME
+.type FUNC_NAME,@function
+.set FUNC_NAME.nstackwords,NSTACKWORDS;  .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ 
+.set FUNC_NAME.maxcores,1;               .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ 
+.set FUNC_NAME.maxtimers,0;              .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ 
+.set FUNC_NAME.maxchanends,0;            .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ 
+.size FUNC_NAME, .L_end_s32 - FUNC_NAME
+#undef FUNC_NAME
+  
+
+
+/*  
+headroom_t vect_s16_max_elementwise(
+    int16_t a[],
+    const int16_t b[],
+    const int16_t c[],
+    const unsigned len,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+*/
+#define FUNC_NAME   vect_s16_max_elementwise
+FUNC_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    li t3, 0x0100
+  { slli t3, len, SIZEOF_LOG2_S16           ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli t3, len, SIZEOF_LOG2_S16           \nMessage: The shift amount is not 32" */
+  { xm.zexti t3, 5                             ; srli len, len, EPV_LOG2_S16              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+    call vect_sXX_max_elementwise
+  { li t3, 15                             ; nop                                         }
+  { sub a0, t3, a0                         ; xm.retsp (NSTACKWORDS)*4                       }
+.L_end_s16:
+
+.global FUNC_NAME
+.type FUNC_NAME,@function
+.set FUNC_NAME.nstackwords,NSTACKWORDS;  .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ 
+.set FUNC_NAME.maxcores,1;               .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ 
+.set FUNC_NAME.maxtimers,0;              .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ 
+.set FUNC_NAME.maxchanends,0;            .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ 
+.size FUNC_NAME, .L_end_s16 - FUNC_NAME
+#undef FUNC_NAME
+  
+
+
+/*  
+headroom_t vect_s8_max_elementwise(
+    int8_t a[],
+    const int8_t b[],
+    const int8_t c[],
+    const unsigned len,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+*/
+#define FUNC_NAME   vect_s8_max_elementwise
+FUNC_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    li t3, 0x0200
+  { mv t3, len                            ; xm.vsetc t3}
+  { xm.zexti t3, 5                             ; srli len, len, EPV_LOG2_S8               }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S8               \nMessage: The shift amount is not 32" */
+    call vect_sXX_max_elementwise
+  { li t3, 7                              ; nop                                         }
+  { sub a0, t3, a0                         ; xm.retsp (NSTACKWORDS)*4                       }
+.L_end_s8:
+
+.global FUNC_NAME
+.type FUNC_NAME,@function
+.set FUNC_NAME.nstackwords,NSTACKWORDS;  .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ 
+.set FUNC_NAME.maxcores,1;               .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ 
+.set FUNC_NAME.maxtimers,0;              .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ 
+.set FUNC_NAME.maxchanends,0;            .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ 
+.size FUNC_NAME, .L_end_s8 - FUNC_NAME
+#undef FUNC_NAME
+
+
+
+#undef len
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_BYTEMASK  6
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define _32         x20
+#define tmp_vec     x21
+#define bytemask    len
+
+
+
+/**
+ * WARNING: This does _NOT_ use the standard ABI. It assumes x28 will contain
+ *          the length of the tail in bytes.
+ */
+
+vect_sXX_max_elementwise:
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+  { li _32, 32                             ; xm.vclrdr                                  }
+  mv shr_c, a5
+  { xm.mkmsk t3, t3                          ; nop}
+  mv shr_b, a4
+  { addi tmp_vec,sp, (STACK_VEC_TMP)*4         ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+  { xm.mkmski t3, 32                           ; sw t3, (STACK_BYTEMASK)*4             (sp)}
+  { nop                                         ; xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { nop                                         ; xm.bu .L_loop_top                          }
+
+  // Deal with main vector body
+.p2align 4
+.L_loop_top:
+    // Here we need to assume shr_b and shr_c have been chosen to guarantee 1 
+    // bit of headroom in each so that c[k] - b[k] can't saturate. That means 
+    // this should be perfectly accurate if there's already at least 1 bit of 
+    // headroom in each input.
+      xm.vlashr c, shr_c
+      xm.vstrpv tmp_vec, t3
+      xm.vlashr b, shr_b
+      xm.vstrpv a, t3 
+    { addi len, len, -1                         ; xm.vlsub tmp_vec}
+    { add c, c, _32                           ; xm.vpos                                    }
+    { add b, b, _32                           ; xm.vladd a}
+    { add a, a, _32                           ; xm.vstr a}
+    { nop                                         ; xm.bt len, .L_loop_top                     }
+.L_loop_bot:
+
+    lw bytemask, (STACK_BYTEMASK)*4(sp)/* Multiple XAT warnings: 'LDWSP has unknown offset - this may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp bytemask, STACK_BYTEMASK\nMessage: The offset can be encoded in sru6 immediate" */
+    beqz bytemask, .L_finish
+    xm.vlashr c, shr_c
+    xm.vstrpv tmp_vec, bytemask
+    xm.vlashr b, shr_b
+    xm.vstrpv a, bytemask
+    xm.vlsub tmp_vec
+    xm.vpos
+    mv t3, tmp_vec
+    xm.vladd a
+    xm.vstd tmp_vec
+    xm.vstrpv tmp_vec, bytemask
+    xm.vldr t3
+    xm.vstr tmp_vec
+    xm.vstrpv a, bytemask
+
+.L_finish:
+    xm.lddsp  s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+    xm.lddsp  s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+    xm.vgetc t3
+    xm.zexti t3, 5
+    mv a0, t3
+    ret 
+
+.L_end_sXX:
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_min_elementwise.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_min_elementwise.S
new file mode 100644
index 00000000..6e8467de
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_min_elementwise.S
@@ -0,0 +1,193 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+#define NSTACKWORDS     (8+8+4)
+
+#define len         x13
+
+.text
+.p2align 4
+
+
+/*  
+headroom_t vect_s32_min_elementwise(
+    int32_t a[],
+    const int32_t b[],
+    const int32_t c[],
+    const unsigned len,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+*/
+#define FUNC_NAME   vect_s32_min_elementwise
+FUNC_NAME:
+  { li t3, 0                              ; xm.entsp (NSTACKWORDS)*4                   }
+  { slli t3, len, SIZEOF_LOG2_S32           ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli t3, len, SIZEOF_LOG2_S32           \nMessage: The shift amount is not 32" */
+  { xm.zexti t3, 5                             ; srli len, len, EPV_LOG2_S32              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    call vect_sXX_min_elementwise
+  { li t3, 31                             ; nop                                         }
+  { sub a0, t3, a0                         ; xm.retsp (NSTACKWORDS)*4                       }
+.L_end_s32:
+
+.global FUNC_NAME
+.type FUNC_NAME,@function
+.set FUNC_NAME.nstackwords,NSTACKWORDS;  .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ 
+.set FUNC_NAME.maxcores,1;               .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ 
+.set FUNC_NAME.maxtimers,0;              .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ 
+.set FUNC_NAME.maxchanends,0;            .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ 
+.size FUNC_NAME, .L_end_s32 - FUNC_NAME
+#undef FUNC_NAME
+  
+
+
+/*  
+headroom_t vect_s16_min_elementwise(
+    int16_t a[],
+    const int16_t b[],
+    const int16_t c[],
+    const unsigned len,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+*/
+#define FUNC_NAME   vect_s16_min_elementwise
+FUNC_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    li t3, 0x0100
+  { slli t3, len, SIZEOF_LOG2_S16           ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shli t3, len, SIZEOF_LOG2_S16           \nMessage: The shift amount is not 32" */
+  { xm.zexti t3, 5                             ; srli len, len, EPV_LOG2_S16              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+    call vect_sXX_min_elementwise
+  { li t3, 15                             ; nop                                         }
+  { sub a0, t3, a0                         ; xm.retsp (NSTACKWORDS)*4                       }
+.L_end_s16:
+
+.global FUNC_NAME
+.type FUNC_NAME,@function
+.set FUNC_NAME.nstackwords,NSTACKWORDS;  .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ 
+.set FUNC_NAME.maxcores,1;               .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ 
+.set FUNC_NAME.maxtimers,0;              .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ 
+.set FUNC_NAME.maxchanends,0;            .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ 
+.size FUNC_NAME, .L_end_s16 - FUNC_NAME
+#undef FUNC_NAME
+  
+
+
+/*  
+headroom_t vect_s8_min_elementwise(
+    int8_t a[],
+    const int8_t b[],
+    const int8_t c[],
+    const unsigned len,
+    const right_shift_t b_shr,
+    const right_shift_t c_shr);
+*/
+#define FUNC_NAME   vect_s8_min_elementwise
+FUNC_NAME:
+    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    li t3, 0x0200
+  { mv t3, len                            ; xm.vsetc t3}
+  { xm.zexti t3, 5                             ; srli len, len, EPV_LOG2_S8               }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in  shri len, len, EPV_LOG2_S8               \nMessage: The shift amount is not 32" */
+    call vect_sXX_min_elementwise
+  { li t3, 7                              ; nop                                         }
+  { sub a0, t3, a0                         ; xm.retsp (NSTACKWORDS)*4                       }
+.L_end_s8:
+
+.global FUNC_NAME
+.type FUNC_NAME,@function
+.set FUNC_NAME.nstackwords,NSTACKWORDS;  .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ 
+.set FUNC_NAME.maxcores,1;               .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ 
+.set FUNC_NAME.maxtimers,0;              .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ 
+.set FUNC_NAME.maxchanends,0;            .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ 
+.size FUNC_NAME, .L_end_s8 - FUNC_NAME
+#undef FUNC_NAME
+
+
+
+#undef len
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_BYTEMASK  6
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define _32         x20
+#define tmp_vec     x21
+#define bytemask    len
+
+
+
+/**
+ * WARNING: This does _NOT_ use the standard ABI. It assumes x28 will contain
+ *          the length of the tail in bytes.
+ */
+
+vect_sXX_min_elementwise:
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+
+  { li _32, 32                             ; xm.vclrdr                                  }
+  mv shr_c, a5
+  { xm.mkmsk t3, t3                          ; nop}
+  mv shr_b, a4
+  { addi tmp_vec,sp, (STACK_VEC_TMP)*4         ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+  { xm.mkmski t3, 32                           ; sw t3, (STACK_BYTEMASK)*4             (sp)}
+  { nop                                         ; xm.brff len, .L_loop_bot                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { nop                                         ; xm.bu .L_loop_top                          }
+
+  // Deal with main vector body
+.p2align 4
+.L_loop_top:
+    // Here we need to assume shr_b and shr_c have been chosen to guarantee 1 
+    // bit of headroom in each so that c[k] - b[k] can't saturate. That means 
+    // this should be perfectly accurate if there's already at least 1 bit of 
+    // headroom in each input.
+      xm.vlashr c, shr_c
+      xm.vstrpv tmp_vec, t3
+      xm.vlashr b, shr_b
+      xm.vstrpv a, t3 
+    { addi len, len, -1                         ; xm.vlsub tmp_vec}
+    { add c, c, _32                           ; xm.vpos                                    }
+    { add b, b, _32                           ; xm.vlsub tmp_vec}
+    { add a, a, _32                           ; xm.vstr a}
+    { nop                                         ; xm.bt len, .L_loop_top                     }
+.L_loop_bot:
+
+    lw bytemask, (STACK_BYTEMASK)*4(sp)/* Multiple XAT warnings: 'LDWSP has unknown offset - this may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp bytemask, STACK_BYTEMASK\nMessage: The offset can be encoded in sru6 immediate" */
+    beqz bytemask, .L_finish
+    xm.vlashr c, shr_c
+    xm.vstrpv tmp_vec, bytemask
+    xm.vlashr b, shr_b
+    xm.vstrpv a, bytemask
+    xm.vlsub tmp_vec
+    xm.vpos
+    mv t3, tmp_vec
+    xm.vlsub tmp_vec
+    xm.vstd tmp_vec
+    xm.vstrpv tmp_vec, bytemask
+    xm.vldr t3
+    xm.vstr tmp_vec
+    xm.vstrpv a, bytemask
+
+.L_finish:
+    xm.lddsp  s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+    xm.lddsp  s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */
+    xm.vgetc t3
+    xm.zexti t3, 5
+    mv a0, t3
+    ret 
+
+
+.L_end_sXX:
+
+
+#endif //defined(__VX4B__)
+
+
+
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_set.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_set.S
new file mode 100644
index 00000000..9f2b5f38
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_set.S
@@ -0,0 +1,133 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+#include "../asm_helper.h"
+
+
+.text
+.p2align 2
+
+#define NSTACKWORDS     32
+
+#define STACK_TMP_VEC       (NSTACKWORDS-16)
+#define STACK_TMP_VEC_DBL   ((STACK_TMP_VEC)/2)
+
+#define data    x10
+#define value   x11
+#define length  x12
+
+
+/*  
+void vect_s16_set(
+    int16_t data[],
+    const int16_t value,
+    const unsigned length);
+*/
+vect_s16_set:
+        xm.entsp (NSTACKWORDS)*4
+    {   slli t3, value, 16                    ;   slli a3, length, SIZEOF_LOG2_S16     }
+    {   xm.zexti value, 16                    ;   xm.zexti a3, 5                       }
+    {   or t3, t3, value                      ;   srli length, length, EPV_LOG2_S16    }
+    {   mv value, t3                          ;   xm.bu .L_set_bytes                   }
+.L_size_end_vect_s16_set: 
+    .size vect_s16_set, .L_size_end_vect_s16_set - vect_s16_set
+
+
+/*  
+void vect_s32_set(
+    int32_t data[],
+    const int32_t value,
+    const unsigned length);
+*/
+vect_s32_set:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   mv t3, value                          ;   slli a3, length, SIZEOF_LOG2_S32     }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli a3, length, SIZEOF_LOG2_S32     \nMessage: The shift amount is not 32" */
+    {   xm.zexti a3, 5                              ;   srli length, length, EPV_LOG2_S32    }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri length, length, EPV_LOG2_S32    \nMessage: The shift amount is not 32" */
+    { nop                                           ;   xm.bu .L_set_bytes                     }
+.L_size_end_vect_s32_set: 
+    .size vect_s32_set, .L_size_end_vect_s32_set - vect_s32_set
+
+#undef value
+#undef length
+#define real x11
+#define imag x12
+#define length x13
+
+/*  
+void vect_complex_s32_set(
+    complex_s32_t data[],
+    const int32_t real_part,
+    const int32_t imag_part,
+    const unsigned length);
+*/
+vect_complex_s32_set:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   mv t3, imag                           ;   slli a3, length, SIZEOF_LOG2_C32     }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli a3, length, SIZEOF_LOG2_C32     \nMessage: The shift amount is not 32" */
+    {   srli a2, length, 5                       ;   xm.zexti a3, 5                          }
+    { nop                                           ;   xm.bu .L_set_bytes                     }
+.L_size_end_vect_complex_s32_set: 
+    .size vect_complex_s32_set, .L_size_end_vect_complex_s32_set - vect_complex_s32_set
+
+
+
+
+#undef real 
+#undef imag 
+#undef length 
+#define value   x11
+#define length  x12
+
+/*
+    Code shared by all functions above.
+*/
+.type .L_set_bytes,@function
+.L_set_bytes:
+        xm.stdsp  value,t3,(STACK_TMP_VEC_DBL+0)*8
+        xm.stdsp  value,t3,(STACK_TMP_VEC_DBL+1)*8
+        xm.stdsp  value,t3,(STACK_TMP_VEC_DBL+2)*8
+        xm.stdsp  value,t3,(STACK_TMP_VEC_DBL+3)*8
+    { nop                                           ;   addi t3,sp, (STACK_TMP_VEC)*4         }
+    {   xm.mkmsk t3, a3                           ;   xm.vldr t3}  
+    {   li a3, 32                              ;   xm.brff length, .L_loop_bot              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                      }
+.p2align 3
+    .L_loop_top:
+        {   addi length, length, -1                   ;   xm.vstr data}
+        {   add data, data, a3                      ;   xm.bt length, .L_loop_top              }
+.L_loop_bot:
+    xm.vstrpv data, t3
+    xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
+
+.L_end_set_bytes: 
+    .size .L_set_bytes, .L_end_set_bytes - .L_set_bytes
+
+
+
+
+
+.globl vect_s16_set
+.type vect_s16_set,@function
+.set vect_s16_set.nstackwords,NSTACKWORDS;  .global vect_s16_set.nstackwords;  /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s16_set.maxcores,1;               .global vect_s16_set.maxcores;  /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s16_set.maxtimers,0;              .global vect_s16_set.maxtimers;  /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s16_set.maxchanends,0;            .global vect_s16_set.maxchanends;  /* Translation error on this line: unexpected token at position 31. */ 
+
+.globl vect_s32_set
+.type vect_s32_set,@function
+.set vect_s32_set.nstackwords,NSTACKWORDS;  .global vect_s32_set.nstackwords;  /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s32_set.maxcores,1;               .global vect_s32_set.maxcores;  /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s32_set.maxtimers,0;              .global vect_s32_set.maxtimers;  /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s32_set.maxchanends,0;            .global vect_s32_set.maxchanends;  /* Translation error on this line: unexpected token at position 31. */ 
+
+.globl vect_complex_s32_set
+.type vect_complex_s32_set,@function
+.set vect_complex_s32_set.nstackwords,NSTACKWORDS;  .global vect_complex_s32_set.nstackwords;  /* Translation error on this line: unexpected token at position 49. */ 
+.set vect_complex_s32_set.maxcores,1;               .global vect_complex_s32_set.maxcores;  /* Translation error on this line: unexpected token at position 36. */ 
+.set vect_complex_s32_set.maxtimers,0;              .global vect_complex_s32_set.maxtimers;  /* Translation error on this line: unexpected token at position 37. */ 
+.set vect_complex_s32_set.maxchanends,0;            .global vect_complex_s32_set.maxchanends;  /* Translation error on this line: unexpected token at position 39. */ 
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_shl.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_shl.S
new file mode 100644
index 00000000..2f9b013c
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_shl.S
@@ -0,0 +1,169 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+/*  
+headroom_t vect_s16_shl(
+    int16_t a[],
+    const int16_t b[],
+    const unsigned length,
+    const int shl);
+
+headroom_t vect_s32_shl(
+    int32_t a[],,
+    const int32_t b[],
+    const unsigned length,
+    const int shl);
+*/
+
+#include "../asm_helper.h"
+
+
+
+#define NSTACKWORDS     (8+2+2+4)
+
+#define FUNCTION_NAME   shl_vect
+#define FNAME_S16       CAT(FUNCTION_NAME, _s16)
+#define FNAME_S32       CAT(FUNCTION_NAME, _s32)
+
+#define STACK_TMP_VEC       (NSTACKWORDS-10)
+
+#define a           x10
+#define b           x11
+#define len         x12
+#define b_shl       x13
+
+
+.text
+.p2align 2
+
+
+
+
+vect_s16_shl:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        li t3, 0x0100
+    {   slli s2, len, SIZEOF_LOG2_S16            ;   sw s2, 4                       (sp)}/* Multiple XAT warnings: 'STWSP outside of known frame - offset may need correction', "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli s2, len, SIZEOF_LOG2_S16            \nMessage: The shift amount is not 32" */
+    {   srli len, len, EPV_LOG2_S16              ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S16              \nMessage: The shift amount is not 32" */
+    {   xm.zexti s2, 5                              ;   xm.bu .L_apply_op                      }
+
+
+.L_size_end_vect_s16_shl: 
+    .size vect_s16_shl, .L_size_end_vect_s16_shl - vect_s16_shl
+
+
+
+
+
+
+
+vect_s32_shl:
+
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   li t3, 0                              ;   sw s2, 4                       (sp)}
+    {   slli s2, len, SIZEOF_LOG2_S32            ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli s2, len, SIZEOF_LOG2_S32            \nMessage: The shift amount is not 32" */
+    {   srli len, len, EPV_LOG2_S32              ;   xm.zexti s2, 5                          }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    { nop                                           ;   xm.bu .L_apply_op                      }
+    
+
+.L_size_end_vect_s32_shl: 
+    .size vect_s32_shl, .L_size_end_vect_s32_shl - vect_s32_shl
+    
+#undef a
+#undef b
+#undef len
+#undef b_shl
+
+
+
+
+
+
+/*
+    When branching here:
+        *   a --> x10
+        *   b --> x11
+        *   loop_count --> x12
+        *   shl --> x13
+        *   tail --> x18
+        *   VPU mode must already be set.
+*/
+
+#define a           x10
+#define b           x11
+#define loop_count  x12
+#define b_shl       x13
+#define b_shr         b_shl
+#define tail        x18
+
+
+.type .L_apply_op,@function;  /* Translation error on this line: unexpected token at position 27. */ 
+
+.L_apply_op:
+
+    {   xm.neg b_shr, b_shl                        ;   xm.zexti s2, 5                          }
+    {   xm.mkmsk tail, tail                        ;   xm.brff loop_count, .L_loop_bot          }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    {   li t3, 32                             ;   xm.bu .L_loop_top                      }
+    { nop                                           ;   xm.bu .L_loop_top_inplace              }
+
+    .p2align 4
+    .L_loop_top_inplace:
+            xm.vlashr b, b_shr
+        {   addi loop_count, loop_count, -1           ;   xm.vstr b}
+        {   add b, b, t3                           ;   xm.bt loop_count, .L_loop_top_inplace  }
+    {   mv a, b                                ;   xm.bu .L_loop_bot                      }
+
+    .p2align 4
+    .L_loop_top:
+            xm.vlashr b, b_shr
+        {   add b, b, t3                           ;   xm.vstr a}
+        {   addi loop_count, loop_count, -1           ; nop                                       }
+        {   add a, a, t3                           ;   xm.bt loop_count, .L_loop_top          }
+    
+.L_loop_bot:
+
+    {   addi t3,sp, (STACK_TMP_VEC)*4             ;   xm.brff tail, .L_finish                  }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.vclrdr                              }
+        xm.vlashr b, b_shr
+    { nop                                           ;   xm.vstd t3}
+        xm.vstrpv t3, tail
+    {   li a2, 32                              ;   xm.vldr t3}
+    {   slli a2, a2, 3                           ;   xm.vstr t3}
+        xm.vstrpv a, tail
+
+.L_finish:
+    {   li a0, 32                              ;   xm.vgetc t3}
+    {   srli a1, t3, 8                          ;   lw s2, 4                       (sp)}
+    {   xm.zexti t3, 5                             ;   xm.shr a0, a0, a1                      }
+    {   addi t3, t3, 1                         ; nop                                       }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+
+.L_end_apply_op: 
+    .size .L_apply_op, .L_end_apply_op - .L_apply_op
+
+
+
+
+
+
+
+.globl vect_s16_shl
+.type vect_s16_shl,@function
+.set vect_s16_shl.nstackwords,NSTACKWORDS;  .global vect_s16_shl.nstackwords /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s16_shl.maxcores,1;               .global vect_s16_shl.maxcores /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s16_shl.maxtimers,0;              .global vect_s16_shl.maxtimers /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s16_shl.maxchanends,0;            .global vect_s16_shl.maxchanends /* Translation error on this line: unexpected token at position 31. */ 
+
+.globl vect_s32_shl
+.type vect_s32_shl,@function
+.set vect_s32_shl.nstackwords,NSTACKWORDS;  .global vect_s32_shl.nstackwords /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s32_shl.maxcores,1;               .global vect_s32_shl.maxcores /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s32_shl.maxtimers,0;              .global vect_s32_shl.maxtimers /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s32_shl.maxchanends,0;            .global vect_s32_shl.maxchanends /* Translation error on this line: unexpected token at position 31. */ 
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sub.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sub.S
new file mode 100644
index 00000000..da356ec8
--- /dev/null
+++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sub.S
@@ -0,0 +1,151 @@
+// Copyright 2020-2026 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#if defined(__VX4B__)
+
+
+
+#include "../asm_helper.h"
+
+
+.text
+.p2align 2
+
+
+#define NSTACKWORDS     (32)
+
+
+#define STACK_VEC_TMP   (NSTACKWORDS-8-1)
+#define STACK_BYTEMASK  8
+
+
+#define a           x10 
+#define b           x11 
+#define c           x12
+#define len         x13
+#define shr_b       x18
+#define shr_c       x19
+#define _32         x20
+#define tmp_vec     x21
+#define bytemask    len
+
+
+
+
+/*  
+headroom_t vect_s16_sub(
+    int16_t a[],
+    const int16_t b[],
+    const int16_t c[],
+    const unsigned len,
+    const int b_shr,
+    const int c_shr);
+*/
+vect_s16_sub:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        li t3, 0x100
+    {   slli t3, len, SIZEOF_LOG2_S16           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S16           \nMessage: The shift amount is not 32" */
+    {   xm.zexti t3, 5                             ;   srli len, len, EPV_LOG2_S16          }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S16          \nMessage: The shift amount is not 32" */
+    { nop                                           ;   xm.bu .L_apply_op                      }
+.L_func_end_s16: 
+
+
+
+/*
+headroom_t vect_s32_sub(
+    int32_t a[],
+    const int32_t b[],
+    const int32_t c[],
+    const unsigned len,
+    const int b_shr,
+    const int c_shr);
+*/    
+vect_s32_sub:
+        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    {   li t3, 0                              ; nop                                           }
+    {   slli t3, len, SIZEOF_LOG2_S32           ;   xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shli t3, len, SIZEOF_LOG2_S32           \nMessage: The shift amount is not 32" */
+    {   xm.zexti t3, 5                             ;   srli len, len, EPV_LOG2_S32              }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in    shri len, len, EPV_LOG2_S32              \nMessage: The shift amount is not 32" */
+    { nop                                           ;   xm.bu .L_apply_op                          }
+.L_func_end_s32:
+
+
+
+
+
+
+/*
+    Code shared by all functions above
+*/
+.type .L_apply_op,@function
+.L_apply_op:
+
+        xm.stdsp  s3,s2,0
+        xm.stdsp  s5,s4,8
+    {   li _32, 32                             ;   xm.vclrdr                              }
+    mv shr_c, a5
+    mv shr_b, a4
+    {   xm.mkmsk t3, t3                          ;   nop          }
+    {   addi tmp_vec,sp, (NSTACKWORDS-8-1)*4         ;   nop          }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */
+    {   xm.mkmski t3, 32                           ;   sw t3, (STACK_BYTEMASK)*4         (sp)}
+    { nop                                           ;   xm.brff len, .L_loop_bot                 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                                           ;   xm.bu .L_loop_top                      }
+
+.p2align 4
+.L_loop_top:
+            xm.vlashr b, shr_b
+            xm.vstrpv tmp_vec, t3
+            xm.vlashr c, shr_c
+        {   add b, b, _32                           ; nop                                       } 
+        {   add c, c, _32                           ;   xm.vlsub tmp_vec}  
+        {   addi len, len, -1                         ;   xm.vstr a}
+        {   add a, a, _32                           ;   xm.bt len, .L_loop_top                 }
+.L_loop_bot:
+
+    { nop                                           ;   lw bytemask, (STACK_BYTEMASK)*4    (sp)}
+    { nop                                           ;   xm.brff bytemask, .L_finish              }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        xm.vlashr b, shr_b
+        xm.vstrpv tmp_vec, t3 
+        xm.vlashr c, shr_c
+    {   mv t3, tmp_vec                        ;   xm.vlsub tmp_vec}
+    { nop                                           ;   xm.vstd tmp_vec}
+        xm.vstrpv tmp_vec, bytemask
+        xm.vstrpv a, bytemask
+    { nop                                           ;   xm.vldr t3}
+    { nop                                           ;   xm.vstr t3}
+
+.L_finish:
+    xm.lddsp  s3,s2,0
+    xm.lddsp  s5,s4,8
+
+    // Should work for both 16 and 32 bit modes
+    {   li a0, 32                              ;   xm.vgetc t3}
+    {   xm.zexti t3, 5                             ;   srli a1, t3, 8                      }
+    {   xm.shr a0, a0, a1                          ;   addi t3, t3, 1                     }
+    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                   } 
+
+.L_end_apply_op: 
+.size .L_apply_op, .L_end_apply_op - .L_apply_op
+
+
+
+
+.global vect_s16_sub
+.type vect_s16_sub,@function
+.set vect_s16_sub.nstackwords,NSTACKWORDS;  .global vect_s16_sub.nstackwords /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s16_sub.maxcores,1;               .global vect_s16_sub.maxcores /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s16_sub.maxtimers,0;              .global vect_s16_sub.maxtimers /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s16_sub.maxchanends,0;            .global vect_s16_sub.maxchanends /* Translation error on this line: unexpected token at position 31. */ 
+.size vect_s16_sub, .L_func_end_s16 - vect_s16_sub
+
+.global vect_s32_sub
+.type vect_s32_sub,@function
+.set vect_s32_sub.nstackwords,NSTACKWORDS;  .global vect_s32_sub.nstackwords /* Translation error on this line: unexpected token at position 41. */ 
+.set vect_s32_sub.maxcores,1;               .global vect_s32_sub.maxcores /* Translation error on this line: unexpected token at position 28. */ 
+.set vect_s32_sub.maxtimers,0;              .global vect_s32_sub.maxtimers /* Translation error on this line: unexpected token at position 29. */ 
+.set vect_s32_sub.maxchanends,0;            .global vect_s32_sub.maxchanends /* Translation error on this line: unexpected token at position 31. */ 
+.size vect_s32_sub, .L_func_end_s32 - vect_s32_sub
+
+    
+
+
+#endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/xs3/asm_helper.h b/lib_xcore_math/src/arch/xs3/asm_helper.h
index 02e41768..43d5bfc6 100644
--- a/lib_xcore_math/src/arch/xs3/asm_helper.h
+++ b/lib_xcore_math/src/arch/xs3/asm_helper.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #ifndef ASM_HELPER_H_
diff --git a/lib_xcore_math/src/arch/xs3/chunk_s16/chunk_s16_accumulate.S b/lib_xcore_math/src/arch/xs3/chunk_s16/chunk_s16_accumulate.S
index 09bd0a06..f6da547d 100644
--- a/lib_xcore_math/src/arch/xs3/chunk_s16/chunk_s16_accumulate.S
+++ b/lib_xcore_math/src/arch/xs3/chunk_s16/chunk_s16_accumulate.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_dot.S b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_dot.S
index efad32aa..30d3df59 100644
--- a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_dot.S
+++ b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_dot.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_log.S b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_log.S
index 094164bc..f73cb86c 100644
--- a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_log.S
+++ b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_log.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series.S b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series.S
index 64b382a0..0fe8002b 100644
--- a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series.S
+++ b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series_v2.S b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series_v2.S
index 2bb0c0e9..9ef5c7d7 100644
--- a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series_v2.S
+++ b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series_v2.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct12_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct12_s32.S
index 25e83438..b2ced0e6 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/dct12_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct12_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct16_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct16_s32.S
index a9fc575c..adb15b7d 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/dct16_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct16_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct24_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct24_s32.S
index eb0dcf07..d08b2465 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/dct24_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct24_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct6_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct6_s32.S
index 2e4abdf0..546306c6 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/dct6_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct6_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct8_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct8_s32.S
index 534555b6..c08bf7f1 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/dct8_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct8_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct_adsb_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct_adsb_s32.S
index 94bced07..2853a02b 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/dct_adsb_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct_adsb_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct_deconvolve_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct_deconvolve_s32.S
index d4e1deb2..ffc63444 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/dct_deconvolve_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct_deconvolve_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct6_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct6_s32.S
index bcc2c426..de947ea7 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/idct6_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct6_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct8_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct8_s32.S
index a865b14f..d609ce7b 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/idct8_s32.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct8_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct_adsb.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct_adsb.S
index ba927a8e..b79242c0 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/idct_adsb.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct_adsb.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct_convolve.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct_convolve.S
index eb688940..d721a528 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/idct_convolve.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct_convolve.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct_scale.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct_scale.S
index a1ebcff0..c05efeb5 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s32/idct_scale.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct_scale.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageA.S b/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageA.S
index 029c8119..37379c0a 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageA.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageA.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageB.S b/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageB.S
index a6fcc330..f92cca7a 100644
--- a/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageB.S
+++ b/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageB.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/dct/vect_s32_flip.S b/lib_xcore_math/src/arch/xs3/dct/vect_s32_flip.S
index 0f2dd69d..81a610eb 100644
--- a/lib_xcore_math/src/arch/xs3/dct/vect_s32_flip.S
+++ b/lib_xcore_math/src/arch/xs3/dct/vect_s32_flip.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/fft/dif_fft.S b/lib_xcore_math/src/arch/xs3/fft/dif_fft.S
index 6c0b3c70..8f2f465c 100644
--- a/lib_xcore_math/src/arch/xs3/fft/dif_fft.S
+++ b/lib_xcore_math/src/arch/xs3/fft/dif_fft.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/fft/dit_fft.S b/lib_xcore_math/src/arch/xs3/fft/dit_fft.S
index 6e78d42a..7eb48e8e 100644
--- a/lib_xcore_math/src/arch/xs3/fft/dit_fft.S
+++ b/lib_xcore_math/src/arch/xs3/fft/dit_fft.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S b/lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S
index 635f4b19..de8557ef 100644
--- a/lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S
+++ b/lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_index_bit_reversal.S b/lib_xcore_math/src/arch/xs3/fft/fft_index_bit_reversal.S
index e5415e71..10f4fb1b 100644
--- a/lib_xcore_math/src/arch/xs3/fft/fft_index_bit_reversal.S
+++ b/lib_xcore_math/src/arch/xs3/fft/fft_index_bit_reversal.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #if defined(__XS3A__)
 
diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_mono_adjust.S b/lib_xcore_math/src/arch/xs3/fft/fft_mono_adjust.S
index ab530a64..ece1b693 100644
--- a/lib_xcore_math/src/arch/xs3/fft/fft_mono_adjust.S
+++ b/lib_xcore_math/src/arch/xs3/fft/fft_mono_adjust.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_spectra_merge.S b/lib_xcore_math/src/arch/xs3/fft/fft_spectra_merge.S
index 03789067..32b89228 100644
--- a/lib_xcore_math/src/arch/xs3/fft/fft_spectra_merge.S
+++ b/lib_xcore_math/src/arch/xs3/fft/fft_spectra_merge.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_spectra_split.S b/lib_xcore_math/src/arch/xs3/fft/fft_spectra_split.S
index c7e334c0..2498ba0a 100644
--- a/lib_xcore_math/src/arch/xs3/fft/fft_spectra_split.S
+++ b/lib_xcore_math/src/arch/xs3/fft/fft_spectra_split.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/fft/tail_reverse_complex_s32.S b/lib_xcore_math/src/arch/xs3/fft/tail_reverse_complex_s32.S
index 2137c68e..0085c2f0 100644
--- a/lib_xcore_math/src/arch/xs3/fft/tail_reverse_complex_s32.S
+++ b/lib_xcore_math/src/arch/xs3/fft/tail_reverse_complex_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/filter/filter_biquad_s32.S b/lib_xcore_math/src/arch/xs3/filter/filter_biquad_s32.S
index 46d6d01a..451f4933 100644
--- a/lib_xcore_math/src/arch/xs3/filter/filter_biquad_s32.S
+++ b/lib_xcore_math/src/arch/xs3/filter/filter_biquad_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/xs3/filter/filter_biquad_sat_s32.S b/lib_xcore_math/src/arch/xs3/filter/filter_biquad_sat_s32.S
index fdebdc76..9baec71d 100644
--- a/lib_xcore_math/src/arch/xs3/filter/filter_biquad_sat_s32.S
+++ b/lib_xcore_math/src/arch/xs3/filter/filter_biquad_sat_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/xs3/filter/filter_fir_s16.S b/lib_xcore_math/src/arch/xs3/filter/filter_fir_s16.S
index fe18d3d6..c964c376 100644
--- a/lib_xcore_math/src/arch/xs3/filter/filter_fir_s16.S
+++ b/lib_xcore_math/src/arch/xs3/filter/filter_fir_s16.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/xs3/filter/filter_fir_s32.S b/lib_xcore_math/src/arch/xs3/filter/filter_fir_s32.S
index 4fe9615b..15df9e4c 100644
--- a/lib_xcore_math/src/arch/xs3/filter/filter_fir_s32.S
+++ b/lib_xcore_math/src/arch/xs3/filter/filter_fir_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/xs3/filter/push_sample_down_s16.S b/lib_xcore_math/src/arch/xs3/filter/push_sample_down_s16.S
index 17dd839d..bab4e4f3 100644
--- a/lib_xcore_math/src/arch/xs3/filter/push_sample_down_s16.S
+++ b/lib_xcore_math/src/arch/xs3/filter/push_sample_down_s16.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/xs3/filter/push_sample_up_s16.S b/lib_xcore_math/src/arch/xs3/filter/push_sample_up_s16.S
index 1085e9d4..71b71427 100644
--- a/lib_xcore_math/src/arch/xs3/filter/push_sample_up_s16.S
+++ b/lib_xcore_math/src/arch/xs3/filter/push_sample_up_s16.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/xs3/filter/vect_s32_convolve_valid.S b/lib_xcore_math/src/arch/xs3/filter/vect_s32_convolve_valid.S
index 246301e1..b47111db 100644
--- a/lib_xcore_math/src/arch/xs3/filter/vect_s32_convolve_valid.S
+++ b/lib_xcore_math/src/arch/xs3/filter/vect_s32_convolve_valid.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/matrix/mat_mul_s8_x_s8_yield_s32.S b/lib_xcore_math/src/arch/xs3/matrix/mat_mul_s8_x_s8_yield_s32.S
index 92574f50..2c61d51d 100644
--- a/lib_xcore_math/src/arch/xs3/matrix/mat_mul_s8_x_s8_yield_s32.S
+++ b/lib_xcore_math/src/arch/xs3/matrix/mat_mul_s8_x_s8_yield_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
diff --git a/lib_xcore_math/src/arch/xs3/misc/chunk_float_s32_log.S b/lib_xcore_math/src/arch/xs3/misc/chunk_float_s32_log.S
index 8b6044f4..9398e73c 100644
--- a/lib_xcore_math/src/arch/xs3/misc/chunk_float_s32_log.S
+++ b/lib_xcore_math/src/arch/xs3/misc/chunk_float_s32_log.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/misc/util.S b/lib_xcore_math/src/arch/xs3/misc/util.S
index 63ed682c..9d0dd9bd 100644
--- a/lib_xcore_math/src/arch/xs3/misc/util.S
+++ b/lib_xcore_math/src/arch/xs3/misc/util.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/misc/vect_copy.S b/lib_xcore_math/src/arch/xs3/misc/vect_copy.S
index a503d6d4..93baf3b7 100644
--- a/lib_xcore_math/src/arch/xs3/misc/vect_copy.S
+++ b/lib_xcore_math/src/arch/xs3/misc/vect_copy.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/misc/vect_float_s32_ln_prepare.S b/lib_xcore_math/src/arch/xs3/misc/vect_float_s32_ln_prepare.S
index 64b700bb..f0679962 100644
--- a/lib_xcore_math/src/arch/xs3/misc/vect_float_s32_ln_prepare.S
+++ b/lib_xcore_math/src/arch/xs3/misc/vect_float_s32_ln_prepare.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S b/lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S
index b0b511e1..6a5432c1 100644
--- a/lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S
+++ b/lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/f32_log2.S b/lib_xcore_math/src/arch/xs3/scalar/f32_log2.S
index 35748b68..a9d88120 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/f32_log2.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/f32_log2.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/f32_norm.S b/lib_xcore_math/src/arch/xs3/scalar/f32_norm.S
index 6ebe81d9..9d0ace55 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/f32_norm.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/f32_norm.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/f32_power_series.S b/lib_xcore_math/src/arch/xs3/scalar/f32_power_series.S
index 156fca0a..d916a7a7 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/f32_power_series.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/f32_power_series.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/f32_sin.S b/lib_xcore_math/src/arch/xs3/scalar/f32_sin.S
index 3f15a749..c011540a 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/f32_sin.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/f32_sin.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/float_s32_exp.S b/lib_xcore_math/src/arch/xs3/scalar/float_s32_exp.S
index 3f709bf1..c62ed64e 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/float_s32_exp.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/float_s32_exp.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/q24_logistic_fast.S b/lib_xcore_math/src/arch/xs3/scalar/q24_logistic_fast.S
index 156d1f10..7aeb92b2 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/q24_logistic_fast.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/q24_logistic_fast.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/q30_exp_small.S b/lib_xcore_math/src/arch/xs3/scalar/q30_exp_small.S
index 599a6e8b..6c01c82c 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/q30_exp_small.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/q30_exp_small.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/q30_odd_powers.S b/lib_xcore_math/src/arch/xs3/scalar/q30_odd_powers.S
index 8ddb44ed..051513d7 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/q30_odd_powers.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/q30_odd_powers.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/q30_powers.S b/lib_xcore_math/src/arch/xs3/scalar/q30_powers.S
index b565539f..9de099a0 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/q30_powers.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/q30_powers.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/radians_to_sbrads.S b/lib_xcore_math/src/arch/xs3/scalar/radians_to_sbrads.S
index 78cd7c3f..d12c378e 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/radians_to_sbrads.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/radians_to_sbrads.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/sbrad_sin.S b/lib_xcore_math/src/arch/xs3/scalar/sbrad_sin.S
index 1df51572..ef4d73d2 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/sbrad_sin.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/sbrad_sin.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/sbrad_tan.S b/lib_xcore_math/src/arch/xs3/scalar/sbrad_tan.S
index 0bd61f5d..85b8ee2c 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/sbrad_tan.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/sbrad_tan.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s16.S b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s16.S
index 5b830399..943bbe43 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s16.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s16.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s32.S b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s32.S
index 3f7bf89f..be5bdb19 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s32.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s8.S b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s8.S
index c689a974..f06692e1 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s8.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s8.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S b/lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S
index b370e901..5597568d 100644
--- a/lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S
+++ b/lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_complex_scale.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_complex_scale.S
index b06014d2..f7a422f7 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_complex_scale.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_complex_scale.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_macc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_macc.S
index 05393044..8ca467fb 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_macc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_macc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_nmacc.S
index 4509b2c6..6aa1447e 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_nmacc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_nmacc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conjugate_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conjugate_mul.S
index 770c804d..91a8a58a 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conjugate_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conjugate_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_macc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_macc.S
index db1fae4a..4731a8a2 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_macc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_macc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mag.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mag.S
index ab273f4d..0efd80d2 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mag.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mag.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mul.S
index f04d616a..03de0c0e 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_nmacc.S
index e92155e8..0ae21976 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_nmacc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_nmacc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_real_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_real_mul.S
index 6d716f40..f86e6741 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_real_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_real_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_squared_mag.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_squared_mag.S
index 5347f775..9825ab06 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_squared_mag.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_squared_mag.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_sum.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_sum.S
index 017dd28c..5f54a445 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_sum.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_sum.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_to_complex_s32.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_to_complex_s32.S
index 09bdab64..8fc9d480 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_to_complex_s32.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_to_complex_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_complex_scale.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_complex_scale.S
index 34cb459a..355ac0b1 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_complex_scale.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_complex_scale.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_macc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_macc.S
index 631dac76..8ec1da4d 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_macc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_macc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_nmacc.S
index 420edaa9..dec7733b 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_nmacc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_nmacc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate.S
index 32889824..c75a6251 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate_mul.S
index 256c1e66..98b2d2d6 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_macc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_macc.S
index fbd84c11..8736659f 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_macc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_macc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mag.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mag.S
index f711133a..4d8c7a01 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mag.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mag.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mul.S
index 1e8d3b79..bff841b8 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_nmacc.S
index f8508227..7c544727 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_nmacc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_nmacc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_real_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_real_mul.S
index 9cf62f18..8f15bd3d 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_real_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_real_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_squared_mag.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_squared_mag.S
index fa69691c..c9826a8b 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_squared_mag.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_squared_mag.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_sum.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_sum.S
index 8e72e020..8d8c8c63 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_sum.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_sum.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_to_complex_s16.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_to_complex_s16.S
index c8efa1ed..4b4feea1 100644
--- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_to_complex_s16.S
+++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_to_complex_s16.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_macc.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_macc.S
index 2c94046d..42b8e433 100644
--- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_macc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_macc.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_mul.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_mul.S
index c31a504e..a0edc432 100644
--- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_macc.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_macc.S
index 1ffcd8b2..085d0c2f 100644
--- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_macc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_macc.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_mul.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_mul.S
index 1159b8cf..cef37d6c 100644
--- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_add.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_add.S
index ab7600c0..df4444ad 100644
--- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_add.S
+++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_add.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_dot.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_dot.S
index a4ec16b1..e731ae0a 100644
--- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_dot.S
+++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_dot.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_max_exponent.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_max_exponent.S
index 14d63e55..cd0449e7 100644
--- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_max_exponent.S
+++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_max_exponent.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_to_s32.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_to_s32.S
index 77a8875e..1137dee0 100644
--- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_to_s32.S
+++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_to_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_abs_sum.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_abs_sum.S
index 47ee29a8..c6e6427e 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_abs_sum.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_abs_sum.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmax.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmax.S
index dc38a5fb..dfb1ca0e 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmax.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmax.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmin.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmin.S
index 76522edf..a02ca9a6 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmin.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmin.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_clip.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_clip.S
index c4089989..b4a31ea9 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_clip.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_clip.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_dot.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_dot.S
index 0e057b2b..026a5acb 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_dot.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_dot.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_energy.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_energy.S
index 85ca35f4..b7965417 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_energy.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_energy.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_high_byte.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_high_byte.S
index ade68b04..3300b280 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_high_byte.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_high_byte.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_low_byte.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_low_byte.S
index 39c6f53f..b5363b29 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_low_byte.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_low_byte.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_inverse.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_inverse.S
index 24f3583d..9496cbfc 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_inverse.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_inverse.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_macc.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_macc.S
index d65c5fc3..7fb8c9f0 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_macc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_macc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_max.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_max.S
index 6202bd04..4fdfd54e 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_max.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_max.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_min.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_min.S
index 001ccc6b..965f5dd1 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_min.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_min.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_mul.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_mul.S
index 898c58d2..c9c9c972 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_nmacc.S
index d47eb187..6cf1fda2 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_nmacc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_nmacc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_scale.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_scale.S
index 2cfe099f..510e3dc5 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_scale.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_scale.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sqrt.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sqrt.S
index f79fe82c..1d1993f6 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sqrt.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sqrt.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sum.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sum.S
index f3cd0881..7a99d001 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sum.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sum.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_to_s32.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_to_s32.S
index 50aeaec1..be5b7c40 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_to_s32.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_to_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/s32_to_chunk_s32.S b/lib_xcore_math/src/arch/xs3/vect_s32/s32_to_chunk_s32.S
index 23615dd3..9521c203 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/s32_to_chunk_s32.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/s32_to_chunk_s32.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_abs_sum.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_abs_sum.S
index a83b903d..578b82f3 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_abs_sum.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_abs_sum.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmax.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmax.S
index ee0b02dd..e505f711 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmax.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmax.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmin.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmin.S
index b766c200..92592d8c 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmin.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmin.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_clip.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_clip.S
index d1198108..12611520 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_clip.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_clip.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_dot.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_dot.S
index 6e11ebb6..0156a1fe 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_dot.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_dot.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_energy.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_energy.S
index ef1cb34b..287ed4ac 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_energy.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_energy.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_inverse.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_inverse.S
index 3d5973a0..418b1bdf 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_inverse.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_inverse.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_macc.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_macc.S
index 376b0f30..cb885d89 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_macc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_macc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_max.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_max.S
index d3355f13..a25fec67 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_max.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_max.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_merge_accs.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_merge_accs.S
index 54f2f0bc..e4128c73 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_merge_accs.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_merge_accs.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_min.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_min.S
index ee95926d..c58d9018 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_min.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_min.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_mul.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_mul.S
index b3b9f094..fdeaca07 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_mul.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_mul.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_nmacc.S
index 1ff4e1eb..0f3acce6 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_nmacc.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_nmacc.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_scale.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_scale.S
index 43bd896e..e74e3b30 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_scale.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_scale.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_split_accs.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_split_accs.S
index 2702e90a..a41e8562 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_split_accs.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_split_accs.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sqrt.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sqrt.S
index 5b1f7a68..8e31813b 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sqrt.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sqrt.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sum.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sum.S
index c34c41d6..2d0d9177 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sum.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sum.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_f32.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_f32.S
index 37a2ccbd..07950b3a 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_f32.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_f32.S
@@ -1,4 +1,4 @@
-// Copyright 2022-2023 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
     
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_s16.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_s16.S
index 4da673b4..f3fb3d22 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_s16.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_s16.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_unzip.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_unzip.S
index 7b090876..2d647c62 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_unzip.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_unzip.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_zip.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_zip.S
index dfb71846..a6770972 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_zip.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_zip.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_s8/vect_s8_is_negative.S b/lib_xcore_math/src/arch/xs3/vect_s8/vect_s8_is_negative.S
index b1d7cf5f..b3469266 100644
--- a/lib_xcore_math/src/arch/xs3/vect_s8/vect_s8_is_negative.S
+++ b/lib_xcore_math/src/arch/xs3/vect_s8/vect_s8_is_negative.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2022 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_abs.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_abs.S
index e5c8b891..ecf313aa 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_abs.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_abs.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_add.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_add.S
index b842f50d..765a07ba 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_add.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_add.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_headroom.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_headroom.S
index c6cd4d8c..a3504601 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_headroom.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_headroom.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_rect.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_rect.S
index b68dafff..d848fb72 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_rect.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_rect.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_add_scalar.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_add_scalar.S
index 6e808486..8be41f8c 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_add_scalar.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_add_scalar.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_max_elementwise.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_max_elementwise.S
index ff40923b..85155ba6 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_max_elementwise.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_max_elementwise.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_min_elementwise.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_min_elementwise.S
index 4fb2e8f6..27a8de50 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_min_elementwise.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_min_elementwise.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_set.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_set.S
index f7375317..38971029 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_set.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_set.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_shl.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_shl.S
index fa9741a7..03899b71 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_shl.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_shl.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sub.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sub.S
index 37cdb6bf..8081b6a9 100644
--- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sub.S
+++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sub.S
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #if defined(__XS3A__)
diff --git a/lib_xcore_math/src/bfp/bfp_alloc.c b/lib_xcore_math/src/bfp/bfp_alloc.c
index 1271b3f3..ca9e2d84 100644
--- a/lib_xcore_math/src/bfp/bfp_alloc.c
+++ b/lib_xcore_math/src/bfp/bfp_alloc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/bfp/bfp_complex_s16.c b/lib_xcore_math/src/bfp/bfp_complex_s16.c
index b094254b..17844279 100644
--- a/lib_xcore_math/src/bfp/bfp_complex_s16.c
+++ b/lib_xcore_math/src/bfp/bfp_complex_s16.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/bfp/bfp_complex_s32.c b/lib_xcore_math/src/bfp/bfp_complex_s32.c
index 00f3f652..c41f8c1c 100644
--- a/lib_xcore_math/src/bfp/bfp_complex_s32.c
+++ b/lib_xcore_math/src/bfp/bfp_complex_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <assert.h>
diff --git a/lib_xcore_math/src/bfp/bfp_init.c b/lib_xcore_math/src/bfp/bfp_init.c
index 72ec2557..c4fd17f8 100644
--- a/lib_xcore_math/src/bfp/bfp_init.c
+++ b/lib_xcore_math/src/bfp/bfp_init.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/bfp/bfp_s16.c b/lib_xcore_math/src/bfp/bfp_s16.c
index bdc8ddc9..ba41ea49 100644
--- a/lib_xcore_math/src/bfp/bfp_s16.c
+++ b/lib_xcore_math/src/bfp/bfp_s16.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/bfp/bfp_s32.c b/lib_xcore_math/src/bfp/bfp_s32.c
index 5887f7fd..83cc4854 100644
--- a/lib_xcore_math/src/bfp/bfp_s32.c
+++ b/lib_xcore_math/src/bfp/bfp_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2025 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/bfp/misc/gradient_constraint.c b/lib_xcore_math/src/bfp/misc/gradient_constraint.c
index 676bb361..054e9a19 100644
--- a/lib_xcore_math/src/bfp/misc/gradient_constraint.c
+++ b/lib_xcore_math/src/bfp/misc/gradient_constraint.c
@@ -1,4 +1,4 @@
-// Copyright 2021-2023 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/dct/dct8x8.c b/lib_xcore_math/src/dct/dct8x8.c
index 2afec4d2..51fb5b81 100644
--- a/lib_xcore_math/src/dct/dct8x8.c
+++ b/lib_xcore_math/src/dct/dct8x8.c
@@ -1,4 +1,4 @@
-// Copyright 2022-2024 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/dct/dct_forward.c b/lib_xcore_math/src/dct/dct_forward.c
index ca43f930..fbc6fdc5 100644
--- a/lib_xcore_math/src/dct/dct_forward.c
+++ b/lib_xcore_math/src/dct/dct_forward.c
@@ -1,4 +1,4 @@
-// Copyright 2022-2024 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/dct/dct_inverse.c b/lib_xcore_math/src/dct/dct_inverse.c
index 4c3e47df..02978e8f 100644
--- a/lib_xcore_math/src/dct/dct_inverse.c
+++ b/lib_xcore_math/src/dct/dct_inverse.c
@@ -1,4 +1,4 @@
-// Copyright 2022-2024 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.c b/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.c
index 448c78ab..3d20ab1f 100644
--- a/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.c
+++ b/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #include "xmath_fft_lut.h"
 
diff --git a/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.h b/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.h
index 02c2e224..372c22d1 100644
--- a/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.h
+++ b/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/lib_xcore_math/src/fft/fft_bfp.c b/lib_xcore_math/src/fft/fft_bfp.c
index 1ffc2400..2bde08a7 100644
--- a/lib_xcore_math/src/fft/fft_bfp.c
+++ b/lib_xcore_math/src/fft/fft_bfp.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <assert.h>
diff --git a/lib_xcore_math/src/fft/fft_f32.c b/lib_xcore_math/src/fft/fft_f32.c
index 67444026..86669294 100644
--- a/lib_xcore_math/src/fft/fft_f32.c
+++ b/lib_xcore_math/src/fft/fft_f32.c
@@ -1,4 +1,4 @@
-// Copyright 2022 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/filter/filters.c b/lib_xcore_math/src/filter/filters.c
index deceac66..9b71728b 100644
--- a/lib_xcore_math/src/filter/filters.c
+++ b/lib_xcore_math/src/filter/filters.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/scalar/scalar_f32.c b/lib_xcore_math/src/scalar/scalar_f32.c
index 985978e9..36f6b5b1 100644
--- a/lib_xcore_math/src/scalar/scalar_f32.c
+++ b/lib_xcore_math/src/scalar/scalar_f32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/scalar/scalar_float_complex_sXX.c b/lib_xcore_math/src/scalar/scalar_float_complex_sXX.c
index aaf99313..e9fc9b33 100644
--- a/lib_xcore_math/src/scalar/scalar_float_complex_sXX.c
+++ b/lib_xcore_math/src/scalar/scalar_float_complex_sXX.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/scalar/scalar_float_s32.c b/lib_xcore_math/src/scalar/scalar_float_s32.c
index f2f18184..0047a0c5 100644
--- a/lib_xcore_math/src/scalar/scalar_float_s32.c
+++ b/lib_xcore_math/src/scalar/scalar_float_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/scalar/scalar_float_s64.c b/lib_xcore_math/src/scalar/scalar_float_s64.c
index 120687be..e5b027be 100644
--- a/lib_xcore_math/src/scalar/scalar_float_s64.c
+++ b/lib_xcore_math/src/scalar/scalar_float_s64.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/scalar/scalar_ops.c b/lib_xcore_math/src/scalar/scalar_ops.c
index fdbfa473..75e44483 100644
--- a/lib_xcore_math/src/scalar/scalar_ops.c
+++ b/lib_xcore_math/src/scalar/scalar_ops.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/scalar/scalar_qXX.c b/lib_xcore_math/src/scalar/scalar_qXX.c
index f6c7bcd5..b34758eb 100644
--- a/lib_xcore_math/src/scalar/scalar_qXX.c
+++ b/lib_xcore_math/src/scalar/scalar_qXX.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/chunk_s32.c b/lib_xcore_math/src/vect/chunk_s32.c
index c49a1a76..9365630a 100644
--- a/lib_xcore_math/src/vect/chunk_s32.c
+++ b/lib_xcore_math/src/vect/chunk_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/complex_prepare.c b/lib_xcore_math/src/vect/complex_prepare.c
index ce049270..9c8b8121 100644
--- a/lib_xcore_math/src/vect/complex_prepare.c
+++ b/lib_xcore_math/src/vect/complex_prepare.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/convolve.c b/lib_xcore_math/src/vect/convolve.c
index 08451648..ff7080c1 100644
--- a/lib_xcore_math/src/vect/convolve.c
+++ b/lib_xcore_math/src/vect/convolve.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/lib_xcore_math/src/vect/mat_mul.c b/lib_xcore_math/src/vect/mat_mul.c
index 83b3d2cf..07a7efa2 100644
--- a/lib_xcore_math/src/vect/mat_mul.c
+++ b/lib_xcore_math/src/vect/mat_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/prepare.c b/lib_xcore_math/src/vect/prepare.c
index c31a14eb..2aec5656 100644
--- a/lib_xcore_math/src/vect/prepare.c
+++ b/lib_xcore_math/src/vect/prepare.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/vect_complex_mag_rot_tables.c b/lib_xcore_math/src/vect/vect_complex_mag_rot_tables.c
index 59d003ab..8a70e0c2 100644
--- a/lib_xcore_math/src/vect/vect_complex_mag_rot_tables.c
+++ b/lib_xcore_math/src/vect/vect_complex_mag_rot_tables.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/vect_complex_s16.c b/lib_xcore_math/src/vect/vect_complex_s16.c
index 010f26d8..65b35a64 100644
--- a/lib_xcore_math/src/vect/vect_complex_s16.c
+++ b/lib_xcore_math/src/vect/vect_complex_s16.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/vect_complex_s32.c b/lib_xcore_math/src/vect/vect_complex_s32.c
index 70aa655b..6d4fe35b 100644
--- a/lib_xcore_math/src/vect/vect_complex_s32.c
+++ b/lib_xcore_math/src/vect/vect_complex_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/vect_f32.c b/lib_xcore_math/src/vect/vect_f32.c
index 91517e9c..9abe6bf9 100644
--- a/lib_xcore_math/src/vect/vect_f32.c
+++ b/lib_xcore_math/src/vect/vect_f32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/vect_float_s32.c b/lib_xcore_math/src/vect/vect_float_s32.c
index 9389e012..544e9b8a 100644
--- a/lib_xcore_math/src/vect/vect_float_s32.c
+++ b/lib_xcore_math/src/vect/vect_float_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/vect_s16.c b/lib_xcore_math/src/vect/vect_s16.c
index d48ca756..179f7abe 100644
--- a/lib_xcore_math/src/vect/vect_s16.c
+++ b/lib_xcore_math/src/vect/vect_s16.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/vect_s32.c b/lib_xcore_math/src/vect/vect_s32.c
index 9b6a232d..99c41f6e 100644
--- a/lib_xcore_math/src/vect/vect_s32.c
+++ b/lib_xcore_math/src/vect/vect_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/lib_xcore_math/src/vect/vpu_const_vects.c b/lib_xcore_math/src/vect/vpu_const_vects.c
index 56d20aa7..852fd652 100644
--- a/lib_xcore_math/src/vect/vpu_const_vects.c
+++ b/lib_xcore_math/src/vect/vpu_const_vects.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "vpu_const_vects.h"
diff --git a/lib_xcore_math/src/vect/vpu_const_vects.h b/lib_xcore_math/src/vect/vpu_const_vects.h
index 54808415..f1c57e46 100644
--- a/lib_xcore_math/src/vect/vpu_const_vects.h
+++ b/lib_xcore_math/src/vect/vpu_const_vects.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/lib_xcore_math/src/vect/vpu_helper.h b/lib_xcore_math/src/vect/vpu_helper.h
index cf2df0a1..ff9aee8b 100644
--- a/lib_xcore_math/src/vect/vpu_helper.h
+++ b/lib_xcore_math/src/vect/vpu_helper.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/Makefile b/tests/Makefile
new file mode 100644
index 00000000..a807c36e
--- /dev/null
+++ b/tests/Makefile
@@ -0,0 +1,210 @@
+# Configurable variables (override on the make command line)
+# Prefer system clang on macOS to avoid picking up XMOS toolchain clang
+# UNAME_S := $(shell uname -s)
+# ifeq ($(UNAME_S),Darwin)
+# CC := /usr/bin/clang
+# else
+CC := clang
+# endif
+ROOT_DIR := $(shell pwd)/..
+ARCH ?= vx4b
+
+SRC_BFP_COMMON := \
+			$(wildcard $(ROOT_DIR)/tests/bfp_tests/src/misc/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/bfp_tests/src/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/bfp_tests/src/bfp/*/*/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/bfp_tests/src/bfp/*/*.c) 
+
+# Source files (common + arch-specific)
+SRC_DCT_COMMON := \
+			$(wildcard $(ROOT_DIR)/tests/dct_tests/src/lib_dsp/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/dct_tests/src/*.c)
+
+SRC_FFT_COMMON := \
+			$(wildcard $(ROOT_DIR)/tests/fft_tests/src/*.c)
+
+SRC_FILTER_COMMON := \
+			$(wildcard $(ROOT_DIR)/tests/dct_tests/src/filter/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/dct_tests/src/*.c)
+
+SRC_FILTER_COMMON := \
+			$(wildcard $(ROOT_DIR)/tests/filter_tests/src/filter/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/filter_tests/src/*.c)
+
+SRC_SCALAR_COMMON := \
+			$(wildcard $(ROOT_DIR)/tests/scalar_tests/src/basic/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/scalar_tests/src/float/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/scalar_tests/src/util/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/scalar_tests/src/*.c)
+
+SRC_VECT_COMMON := \
+			$(wildcard $(ROOT_DIR)/tests/vect_tests/src/matrix/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/vect_tests/src/vect/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/vect_tests/src/vect/complex/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/vect_tests/src/vect/float/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/vect_tests/src/vect/stat/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/vect_tests/src/*.c)
+
+SRC_ARCH_vx4b := \
+			$(wildcard $(ROOT_DIR)/../lib_unity/lib_unity/Unity/extras/*/src/*.c) \
+			$(wildcard $(ROOT_DIR)/../lib_unity/lib_unity/Unity/src/*.c) \
+			$(wildcard $(ROOT_DIR)/tests/shared/*/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/vect/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/scalar/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/filter/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/etc/xmath_fft_lut/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/fft/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/dct/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/bfp/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/bfp/*/*.c) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/**/*.S) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/*/*/*.S) \
+			$(wildcard $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/*.c) \
+			$(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c \
+			$(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c \
+			$(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/scalar/float_s32.c \
+			$(ROOT_DIR)/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c
+
+
+# Exclude problematic assembly file from the arch source list
+# (keeps rest of the wildcard-based list intact but removes this one file)
+SRC_ARCH_vx4b := $(filter-out $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S,$(SRC_ARCH_vx4b))
+SRC_ARCH_vx4b := $(filter-out $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S,$(SRC_ARCH_vx4b))
+SRC_ARCH_vx4b := $(filter-out $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S,$(SRC_ARCH_vx4b))
+
+# SRC_ARCH_vx4b += $(ROOT_DIR)/lib_xcore_math/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c
+
+# Default include paths (adjust as required)
+COMMON_INCLUDES := -I lib_nn/api \
+	     -I $(ROOT_DIR)/../lib_unity/lib_unity/Unity/extras/fixture/src \
+	     -I $(ROOT_DIR)/../lib_unity/lib_unity/Unity/src \
+	     -I $(ROOT_DIR)/../lib_unity/lib_unity/Unity/extras/memory/src \
+		 -I $(ROOT_DIR)/lib_xcore_math/api\
+		 -I $(ROOT_DIR)/tests/shared/pseudo_rand\
+		 -I $(ROOT_DIR)/tests/shared/testing\
+		 -I $(ROOT_DIR)/tests/shared/floating_fft\
+		 -I $(ROOT_DIR)/lib_xcore_math/src/etc/xmath_fft_lut \
+		 -I $(ROOT_DIR)/lib_xcore_math/src/vect
+
+# Arch-specific flags
+ARCH_FLAGS_vx4b := -mcpu=xmos-vx4b -D__VX4B__ -DSMOKE_TEST
+
+# XMOS toolchain (used for assembling/linking vx4 targets)
+XCC := /Applications/XMOS_XTC_0.2.0/riscv-toolchain/bin/clang
+
+# By default use CC for C compile. When building vx4/vx4b with the XMOS
+# toolchain available, use the XMOS clang for C compile, assembling .S
+# files and linking the final binaries so all object files target the
+# same architecture (prevents mixed-format objects).
+ASM_CC := $(CC)
+LINK_CC := $(CC)
+ifeq ($(findstring vx4,$(ARCH)),vx4)
+ifneq ($(wildcard $(XCC)),)
+CC := $(XCC)
+ASM_CC := $(XCC)
+LINK_CC := $(XCC)
+endif
+endif
+
+# Common compile flags
+COMMON_FLAGS := -Os
+
+# Enable section-level GC: compile into individual sections and ask the linker
+# to drop unused sections. These flags are safe for host builds but are
+# essential for cross-built vx4 binaries to reduce final size.
+COMMON_FLAGS += -ffunction-sections -fdata-sections -DUNITY_SUPPORT_64=1 -DUNITY_INCLUDE_DOUBLE
+
+# Linker flags (pass to the linker via the compiler driver)
+LDFLAGS := -Wl,--gc-sections
+
+CFLAGS := $(COMMON_FLAGS) $(ARCH_FLAGS_$(ARCH)) $(COMMON_INCLUDES)  -I $(ROOT_DIR)/tests/bfp_tests/src 
+
+# Build directories
+OBJDIR := $(ROOT_DIR)/build/$(ARCH)/obj
+BINDIR := $(ROOT_DIR)/build/$(ARCH)/bin
+
+# Ensure arch-specific variable names resolve
+SRC_ARCH := $(SRC_ARCH_$(ARCH))
+
+# Helper: convert source list to object list under OBJDIR
+define objs_from_src
+  $(patsubst %.c,%.o,$(patsubst %.S,%.o,$(patsubst $(ROOT_DIR)/%,$(OBJDIR)/%,$(1))))
+endef
+
+OBJ_BFP := $(call objs_from_src,$(SRC_BFP_COMMON) $(SRC_ARCH))
+OBJ_DCT := $(call objs_from_src,$(SRC_DCT_COMMON) $(SRC_ARCH))
+OBJ_FFT := $(call objs_from_src,$(SRC_FFT_COMMON) $(SRC_ARCH))
+OBJ_FILTER := $(call objs_from_src,$(SRC_FILTER_COMMON) $(SRC_ARCH))
+OBJ_SCALAR := $(call objs_from_src,$(SRC_SCALAR_COMMON) $(SRC_ARCH))
+OBJ_VECT := $(call objs_from_src,$(SRC_VECT_COMMON) $(SRC_ARCH))
+
+# Pattern rules to build objects
+$(OBJDIR)/%.o: $(ROOT_DIR)/%.c
+	@mkdir -p $(dir $@)
+	$(CC) -c $< -o $@ $(CFLAGS)
+
+$(OBJDIR)/%.o: $(ROOT_DIR)/%.S
+	@mkdir -p $(dir $@)
+	$(ASM_CC) -c $< -o $@ $(CFLAGS)
+
+# .PHONY: all build clean
+all: filter bfp dct fft scalar vect
+all: vect
+
+bfp: $(BINDIR)/bfp
+
+
+$(BINDIR)/bfp: $(OBJ_BFP)
+	@mkdir -p $(dir $@)
+	$(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS)
+
+dct: $(BINDIR)/dct
+
+
+$(BINDIR)/dct: $(OBJ_DCT)
+	@mkdir -p $(dir $@)
+	$(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS)
+
+fft: $(BINDIR)/fft
+
+
+$(BINDIR)/fft: $(OBJ_FFT)
+	@mkdir -p $(dir $@)
+	$(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS)
+
+filter: $(BINDIR)/filter
+
+
+$(BINDIR)/filter: $(OBJ_FILTER)
+	@mkdir -p $(dir $@)
+	$(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS)
+
+scalar: $(BINDIR)/scalar
+
+
+$(BINDIR)/scalar: $(OBJ_SCALAR)
+	@mkdir -p $(dir $@)
+	$(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS)
+
+vect: $(BINDIR)/vect
+
+
+$(BINDIR)/vect: $(OBJ_VECT)
+	@mkdir -p $(dir $@)
+	$(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) -I $(ROOT_DIR)/lib_xcore_math/src/vect -I $(ROOT_DIR)/tests/vect_tests/src
+
+
+clean:
+	@echo "Cleaning $(ROOT_DIR)/build/$(ARCH)"
+	rm -rf $(ROOT_DIR)/build/$(ARCH)
+
+run: 
+# 	- xsim $(BINDIR)/bfp --config-file config.xml
+# 	- xsim $(BINDIR)/dct --config-file config.xml
+# 	- xsim $(BINDIR)/fft --config-file config.xml
+# 	- xsim $(BINDIR)/filter --config-file config.xml
+	- xsim $(BINDIR)/scalar --config-file config.xml 
+# 	- xsim $(BINDIR)/vect --config-file config.xml
+
+trace: 
+	- xsim $(BINDIR)/fft --config-file config.xml -t > trace.txt
\ No newline at end of file
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_bitdepth_convert.c b/tests/bfp_tests/src/bfp/complex/test_bfp_bitdepth_convert.c
index df3a564f..65146431 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_bitdepth_convert.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_bitdepth_convert.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add.c
index 6bd94b8e..9db4d19f 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add_scalar.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add_scalar.c
index a9f1d536..6e03db99 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add_scalar.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add_scalar.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conj_macc.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conj_macc.c
index d76312d6..36e4d60b 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conj_macc.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conj_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate.c
index 390c1d7d..cebb341b 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate_mul.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate_mul.c
index 8e71892c..1ef0814b 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate_mul.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -91,8 +91,13 @@ TEST(bfp_complex_conj_mul, bfp_complex_s16_conj_mul)
 
         for(unsigned int i = 0; i < A.length; i++){
             // printf("! %d\t %d \t %d \t %e\n", i, expA.real[i], A.real[i], Af.real[0]);
-            TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]);
-            TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]);
+            #else
+                TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]);
+            #endif
         }
     }
 }
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_energy.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_energy.c
index 926b6a67..320e6b0b 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_energy.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_energy.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_macc.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_macc.c
index de317aab..719acf74 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_macc.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
@@ -94,8 +94,13 @@ TEST(bfp_complex_macc, bfp_complex_s16_macc)
         test_complex_s16_from_double(expA.real, expA.imag, Af.real, Af.imag, LEN, A.exp);
 
         for(unsigned int i = 0; i < A.length; i++){
-            TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]);
-            TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(1<<12, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(1<<12, expA.imag[i], A.imag[i]);
+            #else
+                TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]);
+             #endif
         }
     }
 }
@@ -160,8 +165,13 @@ TEST(bfp_complex_macc, bfp_complex_s16_nmacc)
         test_complex_s16_from_double(expA.real, expA.imag, Af.real, Af.imag, LEN, A.exp);
 
         for(unsigned int i = 0; i < A.length; i++){
-            TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]);
-            TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(1<<12, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(1<<12, expA.imag[i], A.imag[i]);
+            #else
+                TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]);
+             #endif
         }
     }
 }
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mag.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mag.c
index 46001916..eaa6efb9 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mag.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mag.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_make.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_make.c
index 867067ef..e3761900 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_make.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_make.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mul.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mul.c
index 83e9e395..f202404d 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mul.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -89,8 +89,13 @@ TEST(bfp_complex_mul, bfp_complex_s16_mul)
         test_complex_s16_from_double(expA.real, expA.imag, Af.real, Af.imag, MAX_LEN, A.exp);
 
         for(unsigned int i = 0; i < A.length; i++){
-            TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]);
-            TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]);
+            #else
+                TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]);
+            #endif
         }
     }
 }
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_mul.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_mul.c
index 68758d16..ca19b148 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_mul.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_scale.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_scale.c
index c522d033..ba9101c0 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_scale.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_scale.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_scale.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_scale.c
index 1907107b..d6c455d4 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_scale.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_scale.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -98,9 +98,14 @@ TEST(bfp_complex_scale, bfp_complex_s16_scale)
         
         test_complex_s16_from_double(expA.real, expA.imag, Af.real, Af.imag, MAX_LEN, A.exp);
 
-        for(unsigned int i = 0; i < A.length; i++){
-            TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]);
-            TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]);
+        for(unsigned int i = 0; i < A.length; i++){          
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]);
+            #else
+                TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]);
+            #endif
         }
     }
 }
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_squared_mag.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_squared_mag.c
index 459ba2e0..31e9e788 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_squared_mag.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_squared_mag.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -80,8 +80,12 @@ TEST(bfp_complex_squared_mag, bfp_complex_s16_squared_mag)
 
         test_s16_from_double(expA, Af, MAX_LEN, A.exp);
 
-        for(unsigned int i = 0; i < A.length; i++){
-            TEST_ASSERT_INT16_WITHIN(1, expA[i], A.data[i]);
+        for(unsigned int i = 0; i < A.length; i++){           
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(2, expA[i], A.data[i]); 
+            #else
+                TEST_ASSERT_INT16_WITHIN(1, expA[i], A.data[i]); 
+            #endif
         }
 
         
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_sub.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_sub.c
index 7bc83ee7..118f03b2 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_sub.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_sub.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_use_exponent.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_use_exponent.c
index fcddc778..6709977d 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_use_exponent.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_use_exponent.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_sum_complex.c b/tests/bfp_tests/src/bfp/complex/test_bfp_sum_complex.c
index 935146db..aff1b48c 100644
--- a/tests/bfp_tests/src/bfp/complex/test_bfp_sum_complex.c
+++ b/tests/bfp_tests/src/bfp/complex/test_bfp_sum_complex.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -69,8 +69,13 @@ TEST(bfp_complex_sum, bfp_complex_s16_sum)
         float_complex_s32_t result = bfp_complex_s16_sum(&B);
 
         TEST_ASSERT_EQUAL(expected.exp, result.exp);
-        TEST_ASSERT_EQUAL_INT32(expected.mant.re, result.mant.re);
-        TEST_ASSERT_EQUAL_INT32(expected.mant.im, result.mant.im);
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT32_WITHIN(8, expected.mant.re, result.mant.re);
+            TEST_ASSERT_INT32_WITHIN(8, expected.mant.im, result.mant.im);    
+        #else
+            TEST_ASSERT_EQUAL_INT32(expected.mant.re, result.mant.re);
+            TEST_ASSERT_EQUAL_INT32(expected.mant.im, result.mant.im);
+        #endif
     }
 }
 
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_abs.c b/tests/bfp_tests/src/bfp/real/test_bfp_abs.c
index d177350c..18df61d9 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_abs.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_abs.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -66,7 +66,11 @@ TEST(bfp_abs, bfp_s16_abs)
 
         for(unsigned int i = 0; i < A.length; i++){
             int16_t expected = abs(B.data[i]);
-            TEST_ASSERT_EQUAL(expected, A.data[i]);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(1, expected, A.data[i]);
+            #else
+                TEST_ASSERT_EQUAL(expected, A.data[i]);
+            #endif
         }
     }
 }
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_abs_sum.c b/tests/bfp_tests/src/bfp/real/test_bfp_abs_sum.c
index fcfc7cc6..9c70a507 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_abs_sum.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_abs_sum.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -67,8 +67,14 @@ TEST(bfp_abs_sum, bfp_s16_abs_sum)
         for(unsigned int i = 0; i < B.length; i++)
             expected.mant += abs(B.data[i]);
 
-        TEST_ASSERT_EQUAL(expected.exp, result.exp);
-        TEST_ASSERT_EQUAL_INT32(expected.mant, result.mant);
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT32_WITHIN(1, expected.exp, result.exp);
+            TEST_ASSERT_INT32_WITHIN(12, expected.mant, result.mant);    
+        #else
+            TEST_ASSERT_EQUAL(expected.exp, result.exp);
+            TEST_ASSERT_EQUAL_INT32(expected.mant, result.mant);
+        #endif
+        
     }
 }
 
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_add.c b/tests/bfp_tests/src/bfp/real/test_bfp_add.c
index 2ed407e0..b05361e0 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_add.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_add.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_add_scalar.c b/tests/bfp_tests/src/bfp/real/test_bfp_add_scalar.c
index b60e24a6..cd728413 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_add_scalar.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_add_scalar.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_alloc.c b/tests/bfp_tests/src/bfp/real/test_bfp_alloc.c
index 93bf2e04..fde646fb 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_alloc.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_alloc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_argmax.c b/tests/bfp_tests/src/bfp/real/test_bfp_argmax.c
index d9509c73..7f81e732 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_argmax.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_argmax.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_argmin.c b/tests/bfp_tests/src/bfp/real/test_bfp_argmin.c
index 91ebc904..465c9b59 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_argmin.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_argmin.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_clip.c b/tests/bfp_tests/src/bfp/real/test_bfp_clip.c
index 6e493465..66a08957 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_clip.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_clip.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_convolve.c b/tests/bfp_tests/src/bfp/real/test_bfp_convolve.c
index 5d62ef5c..381a9c60 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_convolve.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_convolve.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_dealloc.c b/tests/bfp_tests/src/bfp/real/test_bfp_dealloc.c
index 25da9381..1efcc2f6 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_dealloc.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_dealloc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_depth_convert.c b/tests/bfp_tests/src/bfp/real/test_bfp_depth_convert.c
index 95c5bd3c..ab4b7b0f 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_depth_convert.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_depth_convert.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_dot.c b/tests/bfp_tests/src/bfp/real/test_bfp_dot.c
index 0b6a269a..39db0bf6 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_dot.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_dot.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_energy.c b/tests/bfp_tests/src/bfp/real/test_bfp_energy.c
index 5ad4604d..017ccbc6 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_energy.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_energy.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_headroom.c b/tests/bfp_tests/src/bfp/real/test_bfp_headroom.c
index fc5d24da..0cb1d453 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_headroom.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_headroom.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_init.c b/tests/bfp_tests/src/bfp/real/test_bfp_init.c
index 71888e46..bcc0a363 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_init.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_init.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_inverse.c b/tests/bfp_tests/src/bfp/real/test_bfp_inverse.c
index 78f8fe92..22f4b2f3 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_inverse.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_inverse.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_macc.c b/tests/bfp_tests/src/bfp/real/test_bfp_macc.c
index 217954d5..02c06837 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_macc.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_max.c b/tests/bfp_tests/src/bfp/real/test_bfp_max.c
index 08c13c6b..b5bc07f5 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_max.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_max.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -56,7 +56,19 @@ TEST(bfp_max, bfp_s16_max)
 
         float result = bfp_s16_max(&B);
 
-        TEST_ASSERT_EQUAL_FLOAT(expected, result);
+        int16_t mantissa;
+        exponent_t exponent;
+        f32_unpack_s16(&mantissa, &exponent, result);
+        int16_t exp_mantissa;
+        exponent_t exp_exponent;
+        f32_unpack_s16(&exp_mantissa, &exp_exponent, expected);
+
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT16_WITHIN(128, exp_mantissa, mantissa);
+        #else
+            TEST_ASSERT_INT16_WITHIN(1, exp_mantissa, mantissa);
+        #endif
+        TEST_ASSERT_EQUAL_INT16(exp_exponent, exponent);
     }
 }
 
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_max_elementwise.c b/tests/bfp_tests/src/bfp/real/test_bfp_max_elementwise.c
index 5b3e7140..0e4fb9fd 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_max_elementwise.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_max_elementwise.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_mean.c b/tests/bfp_tests/src/bfp/real/test_bfp_mean.c
index 49162206..2cc77bce 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_mean.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_mean.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -54,7 +54,12 @@ TEST(bfp_mean, bfp_s16_mean)
         double sum = 0;
 
         for(unsigned int i = 0; i < B.length; i++){
-            B.data[i] = pseudo_rand_int16(&seed) >> B.hr;
+            B.data[i] = (pseudo_rand_int16(&seed) >> B.hr);
+
+            //This is a simple way of bounding the error due to the new rounding mode in VX4B
+            #if defined(__VX4B__)
+                B.data[i] = B.data[i]&~1;
+            #endif
 
             sum += B.data[i];
         }
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_min.c b/tests/bfp_tests/src/bfp/real/test_bfp_min.c
index 05b5b8f8..4e68291f 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_min.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_min.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_min_elementwise.c b/tests/bfp_tests/src/bfp/real/test_bfp_min_elementwise.c
index e0a27b36..f2b96d2b 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_min_elementwise.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_min_elementwise.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_mul.c b/tests/bfp_tests/src/bfp/real/test_bfp_mul.c
index 8926c2bf..b7ab8d26 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_mul.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_rect.c b/tests/bfp_tests/src/bfp/real/test_bfp_rect.c
index 96692e42..8048329f 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_rect.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_rect.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_rms.c b/tests/bfp_tests/src/bfp/real/test_bfp_rms.c
index 1a032b54..c029b710 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_rms.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_rms.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -73,7 +73,7 @@ TEST(bfp_rms, bfp_s16_rms)
         const double expectedF = sqrt(mean_energy);
 
         float_s32_t ideal_result = {
-            .mant = lround( expectedF / ldexp((double) 1,result.exp) ),
+            .mant = llround( expectedF / ldexp((double) 1,result.exp) ),
             .exp = (exponent_t) floor( log2(expectedF) ) - 30 };
 
 
@@ -123,7 +123,7 @@ TEST(bfp_rms, bfp_s32_rms)
         const double expectedF = sqrt(mean_energy);
 
         float_s32_t ideal_result = {
-            .mant = lround( expectedF / ldexp((double) 1,result.exp) ),
+            .mant = llround( expectedF / ldexp((double) 1,result.exp) ),
             .exp = (exponent_t) floor( log2(expectedF) ) - 30 };
 
         TEST_ASSERT_INT32_WITHIN(3, ideal_result.exp, result.exp);
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_s16_accumulate.c b/tests/bfp_tests/src/bfp/real/test_bfp_s16_accumulate.c
index 31f941e5..db9ff53a 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_s16_accumulate.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_s16_accumulate.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_scale.c b/tests/bfp_tests/src/bfp/real/test_bfp_scale.c
index 7b44b499..cf9e53dc 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_scale.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_scale.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_set.c b/tests/bfp_tests/src/bfp/real/test_bfp_set.c
index ff0c29fb..d4d27ef1 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_set.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_set.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_shl_vect.c b/tests/bfp_tests/src/bfp/real/test_bfp_shl_vect.c
index 8ecd0721..9617d381 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_shl_vect.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_shl_vect.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_sqrt_vect.c b/tests/bfp_tests/src/bfp/real/test_bfp_sqrt_vect.c
index 21929a66..d352fc2a 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_sqrt_vect.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_sqrt_vect.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_sub.c b/tests/bfp_tests/src/bfp/real/test_bfp_sub.c
index 39ead191..14c021e1 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_sub.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_sub.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_sum.c b/tests/bfp_tests/src/bfp/real/test_bfp_sum.c
index b7cb9d34..136425ed 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_sum.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_sum.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -55,8 +55,16 @@ TEST(bfp_sum, bfp_s16_sum)
         for(unsigned int i = 0; i < B.length; i++)
             expected.mant += B.data[i];
 
-        TEST_ASSERT_EQUAL(expected.exp, result.exp);
-        TEST_ASSERT_EQUAL_INT32(expected.mant, result.mant);
+            #if defined (__VX4B__)
+                // On VX, accumulation may differ by 1 due to different rounding behavior
+                TEST_ASSERT_INT32_WITHIN(1, expected.exp, result.exp);
+                TEST_ASSERT_INT32_WITHIN(12, expected.mant, result.mant);
+            #else
+                TEST_ASSERT_EQUAL(expected.exp, result.exp);
+                TEST_ASSERT_EQUAL_INT32(expected.mant, result.mant);
+            #endif
+        
+        
     }
 }
 
diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_use_exponent.c b/tests/bfp_tests/src/bfp/real/test_bfp_use_exponent.c
index 567b7e7e..56c05926 100644
--- a/tests/bfp_tests/src/bfp/real/test_bfp_use_exponent.c
+++ b/tests/bfp_tests/src/bfp/real/test_bfp_use_exponent.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/main.c b/tests/bfp_tests/src/main.c
index ae81cd6f..46ed3ff4 100644
--- a/tests/bfp_tests/src/main.c
+++ b/tests/bfp_tests/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdio.h>
diff --git a/tests/bfp_tests/src/misc/test_bfp_gradient_constraint.c b/tests/bfp_tests/src/misc/test_bfp_gradient_constraint.c
index bc6a2fbf..60690cb0 100644
--- a/tests/bfp_tests/src/misc/test_bfp_gradient_constraint.c
+++ b/tests/bfp_tests/src/misc/test_bfp_gradient_constraint.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/bfp_tests/src/tst_asserts.h b/tests/bfp_tests/src/tst_asserts.h
index afb0a68d..f037bb92 100644
--- a/tests/bfp_tests/src/tst_asserts.h
+++ b/tests/bfp_tests/src/tst_asserts.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/bfp_tests/src/tst_common.c b/tests/bfp_tests/src/tst_common.c
index 47429aa7..e99674f9 100644
--- a/tests/bfp_tests/src/tst_common.c
+++ b/tests/bfp_tests/src/tst_common.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <tst_common.h>
diff --git a/tests/bfp_tests/src/tst_common.h b/tests/bfp_tests/src/tst_common.h
index 39e7210b..fdb3df07 100644
--- a/tests/bfp_tests/src/tst_common.h
+++ b/tests/bfp_tests/src/tst_common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/bfp_tests/src/unity_config.h b/tests/bfp_tests/src/unity_config.h
index 3b0cc6a1..c19827fa 100644
--- a/tests/bfp_tests/src/unity_config.h
+++ b/tests/bfp_tests/src/unity_config.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/tests/config.xml b/tests/config.xml
new file mode 100644
index 00000000..646ced42
--- /dev/null
+++ b/tests/config.xml
@@ -0,0 +1,555 @@
+<XSystem xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="XSystem.xsd">
+  <System>
+    <Nodes>
+      <Node number="32770" jtagId="0x8633" userId="0x00000000" processorMhz="600" interconnectMhz="600" referenceMhz="100">
+        <Processor number="0" numThreads="8" numTimers="10" numLocks="4" numSyncs="7" numChanEnds="32" numClkBlks="6" codeReference="tile[0]">
+          <MemoryController>
+            <Ram base="0x80000000" size="0x80000"/>
+            <ExtMem base="0x90000000" size="0x10000000"/>
+            <SwMem base="0xC0000000" size="0x20000000"/>
+          </MemoryController>
+        </Processor>
+        <Processor number="1" numThreads="8" numTimers="10" numLocks="4" numSyncs="7" numChanEnds="32" numClkBlks="6" codeReference="tile[1]">
+          <MemoryController>
+            <Ram base="0x80000000" size="0x80000"/>
+            <ExtMem base="0x90000000" size="0x10000000"/>
+            <SwMem base="0xC0000000" size="0x20000000"/>
+          </MemoryController>
+        </Processor>
+      </Node>
+    </Nodes>
+    <JtagChain>
+      <Node id="32770"/>
+    </JtagChain>
+    <Pins>
+      <Pin name="0:X0D00">
+        <Port bitNum="0" core="0" name="XS1_PORT_1A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D01">
+        <Port bitNum="0" core="0" name="XS1_PORT_1B" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D02">
+        <Port bitNum="0" core="0" name="XS1_PORT_4A" node="32770" priority="0"/>
+        <Port bitNum="0" core="0" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="0" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="20" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D03">
+        <Port bitNum="1" core="0" name="XS1_PORT_4A" node="32770" priority="0"/>
+        <Port bitNum="1" core="0" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="1" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="21" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D04">
+        <Port bitNum="0" core="0" name="XS1_PORT_4B" node="32770" priority="0"/>
+        <Port bitNum="2" core="0" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="2" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="22" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D05">
+        <Port bitNum="1" core="0" name="XS1_PORT_4B" node="32770" priority="0"/>
+        <Port bitNum="3" core="0" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="3" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="23" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D06">
+        <Port bitNum="2" core="0" name="XS1_PORT_4B" node="32770" priority="0"/>
+        <Port bitNum="4" core="0" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="4" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="24" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D07">
+        <Port bitNum="3" core="0" name="XS1_PORT_4B" node="32770" priority="0"/>
+        <Port bitNum="5" core="0" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="5" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="25" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D08">
+        <Port bitNum="2" core="0" name="XS1_PORT_4A" node="32770" priority="0"/>
+        <Port bitNum="6" core="0" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="6" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="26" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D09">
+        <Port bitNum="3" core="0" name="XS1_PORT_4A" node="32770" priority="0"/>
+        <Port bitNum="7" core="0" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="7" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="27" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D10">
+        <Port bitNum="0" core="0" name="XS1_PORT_1C" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D11">
+        <Port bitNum="0" core="0" name="XS1_PORT_1D" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D12">
+        <Port bitNum="0" core="0" name="XS1_PORT_1E" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D13">
+        <Port bitNum="0" core="0" name="XS1_PORT_1F" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D14">
+        <Port bitNum="0" core="0" name="XS1_PORT_4C" node="32770" priority="0"/>
+        <Port bitNum="0" core="0" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="8" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="28" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D15">
+        <Port bitNum="1" core="0" name="XS1_PORT_4C" node="32770" priority="0"/>
+        <Port bitNum="1" core="0" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="9" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="29" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D16">
+        <Port bitNum="0" core="0" name="XS1_PORT_4D" node="32770" priority="0"/>
+        <Port bitNum="2" core="0" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="10" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D17">
+        <Port bitNum="1" core="0" name="XS1_PORT_4D" node="32770" priority="0"/>
+        <Port bitNum="3" core="0" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="11" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D18">
+        <Port bitNum="2" core="0" name="XS1_PORT_4D" node="32770" priority="0"/>
+        <Port bitNum="4" core="0" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="12" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D19">
+        <Port bitNum="3" core="0" name="XS1_PORT_4D" node="32770" priority="0"/>
+        <Port bitNum="5" core="0" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="13" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D20">
+        <Port bitNum="2" core="0" name="XS1_PORT_4C" node="32770" priority="0"/>
+        <Port bitNum="6" core="0" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="14" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="30" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D21">
+        <Port bitNum="3" core="0" name="XS1_PORT_4C" node="32770" priority="0"/>
+        <Port bitNum="7" core="0" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="15" core="0" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="31" core="0" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X0D22">
+        <Port bitNum="0" core="0" name="XS1_PORT_1G" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D23">
+        <Port bitNum="0" core="0" name="XS1_PORT_1H" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D24">
+        <Port bitNum="0" core="0" name="XS1_PORT_1I" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D25">
+        <Port bitNum="0" core="0" name="XS1_PORT_1J" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D26">
+        <Port bitNum="0" core="0" name="XS1_PORT_4E" node="32770" priority="0"/>
+        <Port bitNum="0" core="0" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="0" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D27">
+        <Port bitNum="1" core="0" name="XS1_PORT_4E" node="32770" priority="0"/>
+        <Port bitNum="1" core="0" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="1" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D28">
+        <Port bitNum="0" core="0" name="XS1_PORT_4F" node="32770" priority="0"/>
+        <Port bitNum="2" core="0" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="2" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D29">
+        <Port bitNum="1" core="0" name="XS1_PORT_4F" node="32770" priority="0"/>
+        <Port bitNum="3" core="0" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="3" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D30">
+        <Port bitNum="2" core="0" name="XS1_PORT_4F" node="32770" priority="0"/>
+        <Port bitNum="4" core="0" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="4" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D31">
+        <Port bitNum="3" core="0" name="XS1_PORT_4F" node="32770" priority="0"/>
+        <Port bitNum="5" core="0" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="5" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D32">
+        <Port bitNum="2" core="0" name="XS1_PORT_4E" node="32770" priority="0"/>
+        <Port bitNum="6" core="0" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="6" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D33">
+        <Port bitNum="3" core="0" name="XS1_PORT_4E" node="32770" priority="0"/>
+        <Port bitNum="7" core="0" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="7" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D34">
+        <Port bitNum="0" core="0" name="XS1_PORT_1K" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D35">
+        <Port bitNum="0" core="0" name="XS1_PORT_1L" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D36">
+        <Port bitNum="0" core="0" name="XS1_PORT_1M" node="32770" priority="0"/>
+        <Port bitNum="0" core="0" name="XS1_PORT_8D" node="32770" priority="1"/>
+        <Port bitNum="8" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D37">
+        <Port bitNum="0" core="0" name="XS1_PORT_1N" node="32770" priority="0"/>
+        <Port bitNum="1" core="0" name="XS1_PORT_8D" node="32770" priority="1"/>
+        <Port bitNum="9" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D38">
+        <Port bitNum="0" core="0" name="XS1_PORT_1O" node="32770" priority="0"/>
+        <Port bitNum="2" core="0" name="XS1_PORT_8D" node="32770" priority="1"/>
+        <Port bitNum="10" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D39">
+        <Port bitNum="0" core="0" name="XS1_PORT_1P" node="32770" priority="0"/>
+        <Port bitNum="3" core="0" name="XS1_PORT_8D" node="32770" priority="1"/>
+        <Port bitNum="11" core="0" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X0D40">
+        <Port bitNum="4" core="0" name="XS1_PORT_8D" node="32770" priority="0"/>
+        <Port bitNum="12" core="0" name="XS1_PORT_16B" node="32770" priority="1"/>
+      </Pin>
+      <Pin name="0:X0D41">
+        <Port bitNum="5" core="0" name="XS1_PORT_8D" node="32770" priority="0"/>
+        <Port bitNum="13" core="0" name="XS1_PORT_16B" node="32770" priority="1"/>
+      </Pin>
+      <Pin name="0:X0D42">
+        <Port bitNum="6" core="0" name="XS1_PORT_8D" node="32770" priority="0"/>
+        <Port bitNum="14" core="0" name="XS1_PORT_16B" node="32770" priority="1"/>
+      </Pin>
+      <Pin name="0:X0D43">
+        <Port bitNum="7" core="0" name="XS1_PORT_8D" node="32770" priority="0"/>
+        <Port bitNum="15" core="0" name="XS1_PORT_16B" node="32770" priority="1"/>
+      </Pin>
+      <Pin name="0:X0D49">
+        <Port bitNum="0" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D50">
+        <Port bitNum="1" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D51">
+        <Port bitNum="2" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D52">
+        <Port bitNum="3" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D53">
+        <Port bitNum="4" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D54">
+        <Port bitNum="5" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D55">
+        <Port bitNum="6" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D56">
+        <Port bitNum="7" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D57">
+        <Port bitNum="8" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D58">
+        <Port bitNum="9" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D61">
+        <Port bitNum="10" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D62">
+        <Port bitNum="11" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D63">
+        <Port bitNum="12" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D64">
+        <Port bitNum="13" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D65">
+        <Port bitNum="14" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D66">
+        <Port bitNum="15" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D67">
+        <Port bitNum="16" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D68">
+        <Port bitNum="17" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D69">
+        <Port bitNum="18" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X0D70">
+        <Port bitNum="19" core="0" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D00">
+        <Port bitNum="0" core="1" name="XS1_PORT_1A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D01">
+        <Port bitNum="0" core="1" name="XS1_PORT_1B" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D02">
+        <Port bitNum="0" core="1" name="XS1_PORT_4A" node="32770" priority="0"/>
+        <Port bitNum="0" core="1" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="0" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="20" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D03">
+        <Port bitNum="1" core="1" name="XS1_PORT_4A" node="32770" priority="0"/>
+        <Port bitNum="1" core="1" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="1" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="21" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D04">
+        <Port bitNum="0" core="1" name="XS1_PORT_4B" node="32770" priority="0"/>
+        <Port bitNum="2" core="1" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="2" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="22" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D05">
+        <Port bitNum="1" core="1" name="XS1_PORT_4B" node="32770" priority="0"/>
+        <Port bitNum="3" core="1" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="3" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="23" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D06">
+        <Port bitNum="2" core="1" name="XS1_PORT_4B" node="32770" priority="0"/>
+        <Port bitNum="4" core="1" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="4" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="24" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D07">
+        <Port bitNum="3" core="1" name="XS1_PORT_4B" node="32770" priority="0"/>
+        <Port bitNum="5" core="1" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="5" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="25" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D08">
+        <Port bitNum="2" core="1" name="XS1_PORT_4A" node="32770" priority="0"/>
+        <Port bitNum="6" core="1" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="6" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="26" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D09">
+        <Port bitNum="3" core="1" name="XS1_PORT_4A" node="32770" priority="0"/>
+        <Port bitNum="7" core="1" name="XS1_PORT_8A" node="32770" priority="1"/>
+        <Port bitNum="7" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="27" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D10">
+        <Port bitNum="0" core="1" name="XS1_PORT_1C" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D11">
+        <Port bitNum="0" core="1" name="XS1_PORT_1D" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D12">
+        <Port bitNum="0" core="1" name="XS1_PORT_1E" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D13">
+        <Port bitNum="0" core="1" name="XS1_PORT_1F" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D14">
+        <Port bitNum="0" core="1" name="XS1_PORT_4C" node="32770" priority="0"/>
+        <Port bitNum="0" core="1" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="8" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="28" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D15">
+        <Port bitNum="1" core="1" name="XS1_PORT_4C" node="32770" priority="0"/>
+        <Port bitNum="1" core="1" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="9" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="29" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D16">
+        <Port bitNum="0" core="1" name="XS1_PORT_4D" node="32770" priority="0"/>
+        <Port bitNum="2" core="1" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="10" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D17">
+        <Port bitNum="1" core="1" name="XS1_PORT_4D" node="32770" priority="0"/>
+        <Port bitNum="3" core="1" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="11" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D18">
+        <Port bitNum="2" core="1" name="XS1_PORT_4D" node="32770" priority="0"/>
+        <Port bitNum="4" core="1" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="12" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D19">
+        <Port bitNum="3" core="1" name="XS1_PORT_4D" node="32770" priority="0"/>
+        <Port bitNum="5" core="1" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="13" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D20">
+        <Port bitNum="2" core="1" name="XS1_PORT_4C" node="32770" priority="0"/>
+        <Port bitNum="6" core="1" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="14" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="30" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D21">
+        <Port bitNum="3" core="1" name="XS1_PORT_4C" node="32770" priority="0"/>
+        <Port bitNum="7" core="1" name="XS1_PORT_8B" node="32770" priority="1"/>
+        <Port bitNum="15" core="1" name="XS1_PORT_16A" node="32770" priority="2"/>
+        <Port bitNum="31" core="1" name="XS1_PORT_32A" node="32770" priority="3"/>
+      </Pin>
+      <Pin name="0:X1D22">
+        <Port bitNum="0" core="1" name="XS1_PORT_1G" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D23">
+        <Port bitNum="0" core="1" name="XS1_PORT_1H" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D24">
+        <Port bitNum="0" core="1" name="XS1_PORT_1I" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D25">
+        <Port bitNum="0" core="1" name="XS1_PORT_1J" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D26">
+        <Port bitNum="0" core="1" name="XS1_PORT_4E" node="32770" priority="0"/>
+        <Port bitNum="0" core="1" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="0" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D27">
+        <Port bitNum="1" core="1" name="XS1_PORT_4E" node="32770" priority="0"/>
+        <Port bitNum="1" core="1" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="1" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D28">
+        <Port bitNum="0" core="1" name="XS1_PORT_4F" node="32770" priority="0"/>
+        <Port bitNum="2" core="1" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="2" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D29">
+        <Port bitNum="1" core="1" name="XS1_PORT_4F" node="32770" priority="0"/>
+        <Port bitNum="3" core="1" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="3" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D30">
+        <Port bitNum="2" core="1" name="XS1_PORT_4F" node="32770" priority="0"/>
+        <Port bitNum="4" core="1" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="4" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D31">
+        <Port bitNum="3" core="1" name="XS1_PORT_4F" node="32770" priority="0"/>
+        <Port bitNum="5" core="1" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="5" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D32">
+        <Port bitNum="2" core="1" name="XS1_PORT_4E" node="32770" priority="0"/>
+        <Port bitNum="6" core="1" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="6" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D33">
+        <Port bitNum="3" core="1" name="XS1_PORT_4E" node="32770" priority="0"/>
+        <Port bitNum="7" core="1" name="XS1_PORT_8C" node="32770" priority="1"/>
+        <Port bitNum="7" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D34">
+        <Port bitNum="0" core="1" name="XS1_PORT_1K" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D35">
+        <Port bitNum="0" core="1" name="XS1_PORT_1L" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D36">
+        <Port bitNum="0" core="1" name="XS1_PORT_1M" node="32770" priority="0"/>
+        <Port bitNum="0" core="1" name="XS1_PORT_8D" node="32770" priority="1"/>
+        <Port bitNum="8" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D37">
+        <Port bitNum="0" core="1" name="XS1_PORT_1N" node="32770" priority="0"/>
+        <Port bitNum="1" core="1" name="XS1_PORT_8D" node="32770" priority="1"/>
+        <Port bitNum="9" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D38">
+        <Port bitNum="0" core="1" name="XS1_PORT_1O" node="32770" priority="0"/>
+        <Port bitNum="2" core="1" name="XS1_PORT_8D" node="32770" priority="1"/>
+        <Port bitNum="10" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D39">
+        <Port bitNum="0" core="1" name="XS1_PORT_1P" node="32770" priority="0"/>
+        <Port bitNum="3" core="1" name="XS1_PORT_8D" node="32770" priority="1"/>
+        <Port bitNum="11" core="1" name="XS1_PORT_16B" node="32770" priority="2"/>
+      </Pin>
+      <Pin name="0:X1D40">
+        <Port bitNum="4" core="1" name="XS1_PORT_8D" node="32770" priority="0"/>
+        <Port bitNum="12" core="1" name="XS1_PORT_16B" node="32770" priority="1"/>
+      </Pin>
+      <Pin name="0:X1D41">
+        <Port bitNum="5" core="1" name="XS1_PORT_8D" node="32770" priority="0"/>
+        <Port bitNum="13" core="1" name="XS1_PORT_16B" node="32770" priority="1"/>
+      </Pin>
+      <Pin name="0:X1D42">
+        <Port bitNum="6" core="1" name="XS1_PORT_8D" node="32770" priority="0"/>
+        <Port bitNum="14" core="1" name="XS1_PORT_16B" node="32770" priority="1"/>
+      </Pin>
+      <Pin name="0:X1D43">
+        <Port bitNum="7" core="1" name="XS1_PORT_8D" node="32770" priority="0"/>
+        <Port bitNum="15" core="1" name="XS1_PORT_16B" node="32770" priority="1"/>
+      </Pin>
+      <Pin name="0:X1D49">
+        <Port bitNum="0" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D50">
+        <Port bitNum="1" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D51">
+        <Port bitNum="2" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D52">
+        <Port bitNum="3" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D53">
+        <Port bitNum="4" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D54">
+        <Port bitNum="5" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D55">
+        <Port bitNum="6" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D56">
+        <Port bitNum="7" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D57">
+        <Port bitNum="8" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D58">
+        <Port bitNum="9" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D61">
+        <Port bitNum="10" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D62">
+        <Port bitNum="11" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D63">
+        <Port bitNum="12" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D64">
+        <Port bitNum="13" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D65">
+        <Port bitNum="14" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D66">
+        <Port bitNum="15" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D67">
+        <Port bitNum="16" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D68">
+        <Port bitNum="17" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D69">
+        <Port bitNum="18" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+      <Pin name="0:X1D70">
+        <Port bitNum="19" core="1" name="XS1_PORT_32A" node="32770" priority="0"/>
+      </Pin>
+    </Pins>
+  </System>
+</XSystem>
diff --git a/tests/dct_tests/src/lib_dsp/dsp_dct.c b/tests/dct_tests/src/lib_dsp/dsp_dct.c
index 066bd9e3..8286aa5d 100644
--- a/tests/dct_tests/src/lib_dsp/dsp_dct.c
+++ b/tests/dct_tests/src/lib_dsp/dsp_dct.c
@@ -1,4 +1,4 @@
-// Copyright 2015-2024 XMOS LIMITED.
+// Copyright 2015-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "dsp_dct.h"
diff --git a/tests/dct_tests/src/lib_dsp/dsp_dct.h b/tests/dct_tests/src/lib_dsp/dsp_dct.h
index 87f46361..e3c30197 100644
--- a/tests/dct_tests/src/lib_dsp/dsp_dct.h
+++ b/tests/dct_tests/src/lib_dsp/dsp_dct.h
@@ -1,4 +1,4 @@
-// Copyright 2015-2024 XMOS LIMITED.
+// Copyright 2015-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #ifndef DSP_DCT_H_
diff --git a/tests/dct_tests/src/main.c b/tests/dct_tests/src/main.c
index 0983d3ba..c67caa7a 100644
--- a/tests/dct_tests/src/main.c
+++ b/tests/dct_tests/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/dct_tests/src/test_dct8x8.c b/tests/dct_tests/src/test_dct8x8.c
index bba5b73d..4330b64d 100644
--- a/tests/dct_tests/src/test_dct8x8.c
+++ b/tests/dct_tests/src/test_dct8x8.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/dct_tests/src/test_dctXX_forward.c b/tests/dct_tests/src/test_dctXX_forward.c
index aebf2b71..ccc98f18 100644
--- a/tests/dct_tests/src/test_dctXX_forward.c
+++ b/tests/dct_tests/src/test_dctXX_forward.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/dct_tests/src/test_dctXX_inverse.c b/tests/dct_tests/src/test_dctXX_inverse.c
index 13f7372f..47a63973 100644
--- a/tests/dct_tests/src/test_dctXX_inverse.c
+++ b/tests/dct_tests/src/test_dctXX_inverse.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
@@ -81,7 +81,7 @@ TEST(dctXX_inverse, dct6_inverse)
     int32_t max_allowed_diff = DCT_N;
     for(unsigned int n = 0; n < DCT_N; n++){
       int32_t act_val = y[n];
-      int32_t ref_val = lround(ref_out[n]);
+      int32_t ref_val = llround(ref_out[n]);
       TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val);
     }
 
@@ -151,7 +151,7 @@ TEST(dctXX_inverse, dct8_inverse)
     int32_t max_allowed_diff = DCT_N;
     for(unsigned int n = 0; n < DCT_N; n++){
       int32_t act_val = y[n];
-      int32_t ref_val = lround(ref_out[n]);
+      int32_t ref_val = llround(ref_out[n]);
       TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val);
     }
 
@@ -220,7 +220,7 @@ TEST(dctXX_inverse, dct12_inverse)
     int32_t max_allowed_diff = 8;
     for(unsigned int n = 0; n < DCT_N; n++){
       int32_t act_val = y[n];
-      int32_t ref_val = lround(ref_out[n]);
+      int32_t ref_val = llround(ref_out[n]);
       TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val);
     }
 
@@ -290,7 +290,7 @@ TEST(dctXX_inverse, dct16_inverse)
     int32_t max_allowed_diff = 2*DCT_N;
     for(unsigned int n = 0; n < DCT_N; n++){
       int32_t act_val = y[n];
-      int32_t ref_val = lround(ref_out[n]);
+      int32_t ref_val = llround(ref_out[n]);
       TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val);
     }
 
@@ -360,7 +360,7 @@ TEST(dctXX_inverse, dct24_inverse)
     int32_t max_allowed_diff = 2*DCT_N;
     for(unsigned int n = 0; n < DCT_N; n++){
       int32_t act_val = y[n];
-      int32_t ref_val = lround(ref_out[n]);
+      int32_t ref_val = llround(ref_out[n]);
       TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val);
     }
 
@@ -430,7 +430,7 @@ TEST(dctXX_inverse, dct32_inverse)
     int32_t max_allowed_diff = 2*DCT_N;
     for(unsigned int n = 0; n < DCT_N; n++){
       int32_t act_val = y[n];
-      int32_t ref_val = lround(ref_out[n]);
+      int32_t ref_val = llround(ref_out[n]);
       TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val);
     }
 
@@ -500,7 +500,7 @@ TEST(dctXX_inverse, dct48_inverse)
     int32_t max_allowed_diff = 5*DCT_N;
     for(unsigned int n = 0; n < DCT_N; n++){
       int32_t act_val = y[n];
-      int32_t ref_val = lround(ref_out[n]);
+      int32_t ref_val = llround(ref_out[n]);
       TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val);
     }
 
@@ -570,7 +570,7 @@ TEST(dctXX_inverse, dct64_inverse)
     int32_t max_allowed_diff = 6*DCT_N;
     for(unsigned int n = 0; n < DCT_N; n++){
       int32_t act_val = y[n];
-      int32_t ref_val = lround(ref_out[n]);
+      int32_t ref_val = llround(ref_out[n]);
       TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val);
     }
 
diff --git a/tests/dct_tests/src/test_random.h b/tests/dct_tests/src/test_random.h
index 0778d791..ebb02463 100644
--- a/tests/dct_tests/src/test_random.h
+++ b/tests/dct_tests/src/test_random.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/dct_tests/src/tst_common.c b/tests/dct_tests/src/tst_common.c
index 4428b77b..dc996c07 100644
--- a/tests/dct_tests/src/tst_common.c
+++ b/tests/dct_tests/src/tst_common.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "tst_common.h"
diff --git a/tests/dct_tests/src/tst_common.h b/tests/dct_tests/src/tst_common.h
index e8040c81..c0d034aa 100644
--- a/tests/dct_tests/src/tst_common.h
+++ b/tests/dct_tests/src/tst_common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/dct_tests/src/unity_config.h b/tests/dct_tests/src/unity_config.h
index 42edb77d..e1eca455 100644
--- a/tests/dct_tests/src/unity_config.h
+++ b/tests/dct_tests/src/unity_config.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/tests/fft_tests/src/main.c b/tests/fft_tests/src/main.c
index 3c8bda22..554e412c 100644
--- a/tests/fft_tests/src/main.c
+++ b/tests/fft_tests/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/fft_tests/src/test_bfp_fft.c b/tests/fft_tests/src/test_bfp_fft.c
index 8ebd9f14..d2bd9a30 100644
--- a/tests/fft_tests/src/test_bfp_fft.c
+++ b/tests/fft_tests/src/test_bfp_fft.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/fft_tests/src/test_bfp_pack_unpack.c b/tests/fft_tests/src/test_bfp_pack_unpack.c
index d31e8b48..4ece5260 100644
--- a/tests/fft_tests/src/test_bfp_pack_unpack.c
+++ b/tests/fft_tests/src/test_bfp_pack_unpack.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/fft_tests/src/test_fft_dif.c b/tests/fft_tests/src/test_fft_dif.c
index 12396a6c..3ac83d81 100644
--- a/tests/fft_tests/src/test_fft_dif.c
+++ b/tests/fft_tests/src/test_fft_dif.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "xmath/xmath.h"
diff --git a/tests/fft_tests/src/test_fft_dit.c b/tests/fft_tests/src/test_fft_dit.c
index 2d9ce593..710fc1e4 100644
--- a/tests/fft_tests/src/test_fft_dit.c
+++ b/tests/fft_tests/src/test_fft_dit.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/fft_tests/src/test_fft_helpers.c b/tests/fft_tests/src/test_fft_helpers.c
index c9fa33bf..95a9fd08 100644
--- a/tests/fft_tests/src/test_fft_helpers.c
+++ b/tests/fft_tests/src/test_fft_helpers.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/fft_tests/src/test_fft_mono_adjust.c b/tests/fft_tests/src/test_fft_mono_adjust.c
index 41a51110..eedea943 100644
--- a/tests/fft_tests/src/test_fft_mono_adjust.c
+++ b/tests/fft_tests/src/test_fft_mono_adjust.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/fft_tests/src/test_issue96.c b/tests/fft_tests/src/test_issue96.c
index 51e5e9da..e4537431 100644
--- a/tests/fft_tests/src/test_issue96.c
+++ b/tests/fft_tests/src/test_issue96.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/fft_tests/src/test_random.h b/tests/fft_tests/src/test_random.h
index 0778d791..ebb02463 100644
--- a/tests/fft_tests/src/test_random.h
+++ b/tests/fft_tests/src/test_random.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/fft_tests/src/test_vect_f32_fft.c b/tests/fft_tests/src/test_vect_f32_fft.c
index f3b9db77..dad8b4d8 100644
--- a/tests/fft_tests/src/test_vect_f32_fft.c
+++ b/tests/fft_tests/src/test_vect_f32_fft.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/fft_tests/src/tst_common.c b/tests/fft_tests/src/tst_common.c
index 4428b77b..dc996c07 100644
--- a/tests/fft_tests/src/tst_common.c
+++ b/tests/fft_tests/src/tst_common.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "tst_common.h"
diff --git a/tests/fft_tests/src/tst_common.h b/tests/fft_tests/src/tst_common.h
index 69757502..61308439 100644
--- a/tests/fft_tests/src/tst_common.h
+++ b/tests/fft_tests/src/tst_common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/fft_tests/src/unity_config.h b/tests/fft_tests/src/unity_config.h
index 42edb77d..e1eca455 100644
--- a/tests/fft_tests/src/unity_config.h
+++ b/tests/fft_tests/src/unity_config.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/tests/filter_tests/script/test_filter_biquad_s32_case3.py b/tests/filter_tests/script/test_filter_biquad_s32_case3.py
index 65d72afe..786c25ea 100644
--- a/tests/filter_tests/script/test_filter_biquad_s32_case3.py
+++ b/tests/filter_tests/script/test_filter_biquad_s32_case3.py
@@ -1,4 +1,4 @@
-# Copyright 2020-2024 XMOS LIMITED.
+# Copyright 2020-2026 XMOS LIMITED.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 import numpy as np
 
diff --git a/tests/filter_tests/script/test_filter_biquad_sat_s32_case3.py b/tests/filter_tests/script/test_filter_biquad_sat_s32_case3.py
index a58a3b55..7fa1f74a 100644
--- a/tests/filter_tests/script/test_filter_biquad_sat_s32_case3.py
+++ b/tests/filter_tests/script/test_filter_biquad_sat_s32_case3.py
@@ -1,4 +1,4 @@
-# Copyright 2024 XMOS LIMITED.
+# Copyright 2024-2026 XMOS LIMITED.
 # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 import numpy as np
 import test_filter_biquad_s32_case3 as ts
diff --git a/tests/filter_tests/src/filter/test_filter_biquad_s32.c b/tests/filter_tests/src/filter/test_filter_biquad_s32.c
index 0b8de2ae..a98beee5 100644
--- a/tests/filter_tests/src/filter/test_filter_biquad_s32.c
+++ b/tests/filter_tests/src/filter/test_filter_biquad_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/filter_tests/src/filter/test_filter_biquad_sat_s32.c b/tests/filter_tests/src/filter/test_filter_biquad_sat_s32.c
index ea19b4bc..8bfde22d 100644
--- a/tests/filter_tests/src/filter/test_filter_biquad_sat_s32.c
+++ b/tests/filter_tests/src/filter/test_filter_biquad_sat_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -179,10 +179,10 @@ TEST(filter_biquad_sat_s32, case4)
 
     // this should saturate as it's already 2**31-1
     res = filter_biquad_sat_s32(&filter, INT32_MAX);
-    TEST_ASSERT_EQUAL(INT32_MAX, res);
+    TEST_ASSERT_EQUAL(VPU_INT32_MAX, res);
 
     res = filter_biquad_sat_s32(&filter, INT32_MIN);
-    TEST_ASSERT_EQUAL(INT32_MIN + 1, res);
+    TEST_ASSERT_EQUAL(VPU_INT32_MIN, res);
 }
 
 // Test a biquad that overflows halfway through the accumulator, 
diff --git a/tests/filter_tests/src/filter/test_filter_fir_s16.c b/tests/filter_tests/src/filter/test_filter_fir_s16.c
index 7abd884d..26f7ce36 100644
--- a/tests/filter_tests/src/filter/test_filter_fir_s16.c
+++ b/tests/filter_tests/src/filter/test_filter_fir_s16.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/filter_tests/src/filter/test_filter_fir_s16_push_sample.c b/tests/filter_tests/src/filter/test_filter_fir_s16_push_sample.c
index b476b328..de4baea6 100644
--- a/tests/filter_tests/src/filter/test_filter_fir_s16_push_sample.c
+++ b/tests/filter_tests/src/filter/test_filter_fir_s16_push_sample.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/filter_tests/src/filter/test_filter_fir_s32.c b/tests/filter_tests/src/filter/test_filter_fir_s32.c
index 9a43d91f..2b961aac 100644
--- a/tests/filter_tests/src/filter/test_filter_fir_s32.c
+++ b/tests/filter_tests/src/filter/test_filter_fir_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/filter_tests/src/main.c b/tests/filter_tests/src/main.c
index 832b40e2..93ad91b2 100644
--- a/tests/filter_tests/src/main.c
+++ b/tests/filter_tests/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdio.h>
diff --git a/tests/filter_tests/src/tst_common.h b/tests/filter_tests/src/tst_common.h
index 44c74d09..f2bf976d 100644
--- a/tests/filter_tests/src/tst_common.h
+++ b/tests/filter_tests/src/tst_common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/filter_tests/src/unity_config.h b/tests/filter_tests/src/unity_config.h
index 42edb77d..e1eca455 100644
--- a/tests/filter_tests/src/unity_config.h
+++ b/tests/filter_tests/src/unity_config.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/tests/legacy_build/src/main.c b/tests/legacy_build/src/main.c
index 6a969ff4..4bde6f95 100644
--- a/tests/legacy_build/src/main.c
+++ b/tests/legacy_build/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdio.h>
diff --git a/tests/scalar_tests/src/basic/test_cls.c b/tests/scalar_tests/src/basic/test_cls.c
index b4468170..f2778a8e 100644
--- a/tests/scalar_tests/src/basic/test_cls.c
+++ b/tests/scalar_tests/src/basic/test_cls.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/basic/test_hr.c b/tests/scalar_tests/src/basic/test_hr.c
index fcdef154..5988a28e 100644
--- a/tests/scalar_tests/src/basic/test_hr.c
+++ b/tests/scalar_tests/src/basic/test_hr.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_fixed_trig.c b/tests/scalar_tests/src/float/test_fixed_trig.c
index eade7d5c..0dd45d9a 100644
--- a/tests/scalar_tests/src/float/test_fixed_trig.c
+++ b/tests/scalar_tests/src/float/test_fixed_trig.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -121,7 +121,7 @@ TEST(fixed_trig, sbrad_sin)
     volatile int32_t result_q30 = sbrad_sin(alpha);
     // volatile uint32_t t3 = get_reference_time();
 
-    int32_t exp_q30 = lround(ldexp(exp,30));
+    int32_t exp_q30 = llround(ldexp(exp,30));
 
     int32_t er = exp_q30 - result_q30;
     er = (er < 0)? -er : er;
@@ -184,7 +184,7 @@ TEST(fixed_trig, sbrad_tan)
     volatile q2_30 result_q30 = sbrad_tan(alpha_q31);
     // volatile uint32_t t3 = get_reference_time();
 
-    q2_30 expected_q30 = lround(ldexp(expected,30));
+    q2_30 expected_q30 = llround(ldexp(expected,30));
 
     // q2_30 just_for_timing_q30 = round(ldexp(just_for_timing, 30));
 
@@ -235,7 +235,7 @@ TEST(fixed_trig, q24_sin)
     volatile q2_30 result_q30 = q24_sin(theta_q24);
     // volatile uint32_t t3 = get_reference_time();
 
-    q2_30 exp_q30 = lround(ldexp(exp,30));
+    q2_30 exp_q30 = llround(ldexp(exp,30));
 
     q2_30 er = exp_q30 - result_q30;
     er = (er < 0)? -er : er;
@@ -284,7 +284,7 @@ TEST(fixed_trig, q24_cos)
     volatile q2_30 result_q30 = q24_cos(theta_q24);
     // volatile uint32_t t3 = get_reference_time();
 
-    q2_30 exp_q30 = lround(ldexp(exp,30));
+    q2_30 exp_q30 = llround(ldexp(exp,30));
 
     q2_30 er = exp_q30 - result_q30;
     er = (er < 0)? -er : er;
@@ -331,7 +331,7 @@ TEST(fixed_trig, q24_tan)
     volatile float_s32_t result = q24_tan(theta_q24);
     // volatile uint32_t t3 = get_reference_time();
 
-    int32_t exp_fixed = lround(ldexp(exp,-result.exp));
+    int32_t exp_fixed = llround(ldexp(exp,-result.exp));
 
     if(result.exp != -30)
       TEST_ASSERT_LESS_THAN_INT32(2, HR_S32(result.mant));
@@ -350,8 +350,8 @@ TEST(fixed_trig, q24_tan)
       // If we're really that close to the singular point, let's invert both the
       // expected result and actual result to see whether the output of sbrad_tan()
       // was very close to what it should have been.
-      int32_t exp_inv_q30 = lround(ldexp(1/exp, 30));
-      int32_t act_inv_q30 = lround(ldexp(1/ldexp(result.mant, result.exp), 30));
+      int32_t exp_inv_q30 = llround(ldexp(1/exp, 30));
+      int32_t act_inv_q30 = llround(ldexp(1/ldexp(result.mant, result.exp), 30));
 
       TEST_ASSERT_INT32_WITHIN(100, exp_inv_q30, act_inv_q30);
       
diff --git a/tests/scalar_tests/src/float/test_float_convert.c b/tests/scalar_tests/src/float/test_float_convert.c
index b8f6e6bf..f850e6cd 100644
--- a/tests/scalar_tests/src/float/test_float_convert.c
+++ b/tests/scalar_tests/src/float/test_float_convert.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_exp.c b/tests/scalar_tests/src/float/test_float_exp.c
index 3b777b4f..a1d5697c 100644
--- a/tests/scalar_tests/src/float/test_float_exp.c
+++ b/tests/scalar_tests/src/float/test_float_exp.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -66,7 +66,7 @@ TEST(float_exp, float_s32_exp_SPECIFIC_CASES)
 
   // exp(0.0) = 1.0
   x.mant = Q24(0.0);
-  x.exp = -24;
+  x.exp = -23;
   res = float_s32_exp(x);
   TEST_ASSERT( diff_ratio(res, 1.0) < ldexp(1,-24)  );
 
@@ -123,7 +123,7 @@ TEST(float_exp, float_s32_exp_RANDOM)
 
     float_s32_t actual = float_s32_exp(x);
     double expected_f = exp(ldexp(x.mant, x.exp));
-    int32_t expected_mant = lround(ldexp(expected_f,-actual.exp));
+    int32_t expected_mant = llround(ldexp(expected_f,-actual.exp));
 
     if(expected_mant == 0){
       // Just make sure our answer is real close to zero.
diff --git a/tests/scalar_tests/src/float/test_float_log.c b/tests/scalar_tests/src/float/test_float_log.c
index 5589ca0d..0536ab43 100644
--- a/tests/scalar_tests/src/float/test_float_log.c
+++ b/tests/scalar_tests/src/float/test_float_log.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_logistic.c b/tests/scalar_tests/src/float/test_float_logistic.c
index 1cf3b66e..32b2dcc3 100644
--- a/tests/scalar_tests/src/float/test_float_logistic.c
+++ b/tests/scalar_tests/src/float/test_float_logistic.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_s32_sqrt.c b/tests/scalar_tests/src/float/test_float_s32_sqrt.c
index 2e5b604b..3a8d34d8 100644
--- a/tests/scalar_tests/src/float/test_float_s32_sqrt.c
+++ b/tests/scalar_tests/src/float/test_float_s32_sqrt.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_sXX_abs.c b/tests/scalar_tests/src/float/test_float_sXX_abs.c
index c2e1da17..fa9e8339 100644
--- a/tests/scalar_tests/src/float/test_float_sXX_abs.c
+++ b/tests/scalar_tests/src/float/test_float_sXX_abs.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_sXX_add.c b/tests/scalar_tests/src/float/test_float_sXX_add.c
index 01db3a02..12dd27a9 100644
--- a/tests/scalar_tests/src/float/test_float_sXX_add.c
+++ b/tests/scalar_tests/src/float/test_float_sXX_add.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_sXX_div.c b/tests/scalar_tests/src/float/test_float_sXX_div.c
index 4fb1b693..f9703454 100644
--- a/tests/scalar_tests/src/float/test_float_sXX_div.c
+++ b/tests/scalar_tests/src/float/test_float_sXX_div.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_sXX_ema.c b/tests/scalar_tests/src/float/test_float_sXX_ema.c
index 482f3f31..c1643cd2 100644
--- a/tests/scalar_tests/src/float/test_float_sXX_ema.c
+++ b/tests/scalar_tests/src/float/test_float_sXX_ema.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_sXX_gt.c b/tests/scalar_tests/src/float/test_float_sXX_gt.c
index f476d398..f8bf09d6 100644
--- a/tests/scalar_tests/src/float/test_float_sXX_gt.c
+++ b/tests/scalar_tests/src/float/test_float_sXX_gt.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_sXX_mul.c b/tests/scalar_tests/src/float/test_float_sXX_mul.c
index ee9c54d0..d74aa527 100644
--- a/tests/scalar_tests/src/float/test_float_sXX_mul.c
+++ b/tests/scalar_tests/src/float/test_float_sXX_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_sXX_sub.c b/tests/scalar_tests/src/float/test_float_sXX_sub.c
index 6de05e47..b4d4f568 100644
--- a/tests/scalar_tests/src/float/test_float_sXX_sub.c
+++ b/tests/scalar_tests/src/float/test_float_sXX_sub.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_float_trig.c b/tests/scalar_tests/src/float/test_float_trig.c
index a9aa494d..4bbf3bd2 100644
--- a/tests/scalar_tests/src/float/test_float_trig.c
+++ b/tests/scalar_tests/src/float/test_float_trig.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/float/test_q30_powers.c b/tests/scalar_tests/src/float/test_q30_powers.c
index e2564eb4..60dd20f7 100644
--- a/tests/scalar_tests/src/float/test_q30_powers.c
+++ b/tests/scalar_tests/src/float/test_q30_powers.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -46,7 +46,7 @@ TEST(q30_powers, q30_powers)
     double pow = 1.0;
     for(unsigned int i = 1; i < length; i++){
       pow *= bf;
-      expected[i] = lround(ldexp(pow, 30));
+      expected[i] = llround(ldexp(pow, 30));
     }
 
     // volatile uint32_t t0 = get_reference_time();
diff --git a/tests/scalar_tests/src/main.c b/tests/scalar_tests/src/main.c
index 7423816b..e847afed 100644
--- a/tests/scalar_tests/src/main.c
+++ b/tests/scalar_tests/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/scalar_tests/src/tst_asserts.h b/tests/scalar_tests/src/tst_asserts.h
index 07a01c5e..676c84b1 100644
--- a/tests/scalar_tests/src/tst_asserts.h
+++ b/tests/scalar_tests/src/tst_asserts.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/scalar_tests/src/tst_common.c b/tests/scalar_tests/src/tst_common.c
index f9a6fae6..048d5058 100644
--- a/tests/scalar_tests/src/tst_common.c
+++ b/tests/scalar_tests/src/tst_common.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #include "tst_common.h"
 
diff --git a/tests/scalar_tests/src/tst_common.h b/tests/scalar_tests/src/tst_common.h
index c0b6016f..c57c15fd 100644
--- a/tests/scalar_tests/src/tst_common.h
+++ b/tests/scalar_tests/src/tst_common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/scalar_tests/src/unity_config.h b/tests/scalar_tests/src/unity_config.h
index 2998718d..c0699a63 100644
--- a/tests/scalar_tests/src/unity_config.h
+++ b/tests/scalar_tests/src/unity_config.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/tests/scalar_tests/src/util/test_s32_sqrt.c b/tests/scalar_tests/src/util/test_s32_sqrt.c
index d7a8d3c9..efd094c9 100644
--- a/tests/scalar_tests/src/util/test_s32_sqrt.c
+++ b/tests/scalar_tests/src/util/test_s32_sqrt.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/util/test_sXX_inverse.c b/tests/scalar_tests/src/util/test_sXX_inverse.c
index 1a058055..9386494c 100644
--- a/tests/scalar_tests/src/util/test_sXX_inverse.c
+++ b/tests/scalar_tests/src/util/test_sXX_inverse.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/scalar_tests/src/util/test_sXX_mul.c b/tests/scalar_tests/src/util/test_sXX_mul.c
index 587e5a8d..79635c5e 100644
--- a/tests/scalar_tests/src/util/test_sXX_mul.c
+++ b/tests/scalar_tests/src/util/test_sXX_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -60,7 +60,7 @@ TEST(sXX_mul, s16_mul)
 
         double Ef = Bf * Cf;
 
-        int16_t expected = (int16_t) lround( ldexp(Ef, -a_exp) );
+        int16_t expected = (int16_t) llround( ldexp(Ef, -a_exp) );
 
 
         TEST_ASSERT_INT16_WITHIN_MESSAGE(2, expected, A, "");
@@ -98,7 +98,7 @@ TEST(sXX_mul, s32_mul)
 
         double Ef = Bf * Cf;
 
-        int32_t expected = lround( ldexp(Ef, -a_exp) );
+        int32_t expected = llround( ldexp(Ef, -a_exp) );
 
 
         TEST_ASSERT_INT32_WITHIN_MESSAGE(2, expected, A, "");
diff --git a/tests/shared/floating_fft/floating_dct.c b/tests/shared/floating_fft/floating_dct.c
index 8081c8ee..2d693dae 100644
--- a/tests/shared/floating_fft/floating_dct.c
+++ b/tests/shared/floating_fft/floating_dct.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/shared/floating_fft/floating_dct.h b/tests/shared/floating_fft/floating_dct.h
index 3915e85d..5a306f5a 100644
--- a/tests/shared/floating_fft/floating_dct.h
+++ b/tests/shared/floating_fft/floating_dct.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/shared/floating_fft/floating_fft.h b/tests/shared/floating_fft/floating_fft.h
index e0894fad..2911a808 100644
--- a/tests/shared/floating_fft/floating_fft.h
+++ b/tests/shared/floating_fft/floating_fft.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/shared/floating_fft/floating_fft_double.c b/tests/shared/floating_fft/floating_fft_double.c
index 50d32870..62d41508 100644
--- a/tests/shared/floating_fft/floating_fft_double.c
+++ b/tests/shared/floating_fft/floating_fft_double.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/shared/floating_fft/floating_fft_float.c b/tests/shared/floating_fft/floating_fft_float.c
index a44f2b65..ca7a558b 100644
--- a/tests/shared/floating_fft/floating_fft_float.c
+++ b/tests/shared/floating_fft/floating_fft_float.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/shared/floating_fft/floating_fft_util.c b/tests/shared/floating_fft/floating_fft_util.c
index e2dc4d4a..dd9a711e 100644
--- a/tests/shared/floating_fft/floating_fft_util.c
+++ b/tests/shared/floating_fft/floating_fft_util.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/shared/pseudo_rand/pseudo_rand.c b/tests/shared/pseudo_rand/pseudo_rand.c
index f3230bd0..18d9b4df 100644
--- a/tests/shared/pseudo_rand/pseudo_rand.c
+++ b/tests/shared/pseudo_rand/pseudo_rand.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/tests/shared/pseudo_rand/pseudo_rand.h b/tests/shared/pseudo_rand/pseudo_rand.h
index 143b9832..47df5cb6 100644
--- a/tests/shared/pseudo_rand/pseudo_rand.h
+++ b/tests/shared/pseudo_rand/pseudo_rand.h
@@ -1,4 +1,4 @@
-// Copyright 2021-2024 XMOS LIMITED.
+// Copyright 2021-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/tests/shared/pseudo_rand/rand_frame.c b/tests/shared/pseudo_rand/rand_frame.c
index 3367c052..912dc79e 100644
--- a/tests/shared/pseudo_rand/rand_frame.c
+++ b/tests/shared/pseudo_rand/rand_frame.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "rand_frame.h"
diff --git a/tests/shared/pseudo_rand/rand_frame.h b/tests/shared/pseudo_rand/rand_frame.h
index f191fe50..2fa3e88d 100644
--- a/tests/shared/pseudo_rand/rand_frame.h
+++ b/tests/shared/pseudo_rand/rand_frame.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/tests/shared/testing/testing.h b/tests/shared/testing/testing.h
index b104af95..2003c0ea 100644
--- a/tests/shared/testing/testing.h
+++ b/tests/shared/testing/testing.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/tests/shared/testing/testing_conv.c b/tests/shared/testing/testing_conv.c
index 4e67b917..d4753e98 100644
--- a/tests/shared/testing/testing_conv.c
+++ b/tests/shared/testing/testing_conv.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "testing.h"
diff --git a/tests/shared/testing/testing_diff.c b/tests/shared/testing/testing_diff.c
index 4b696b10..d437027e 100644
--- a/tests/shared/testing/testing_diff.c
+++ b/tests/shared/testing/testing_diff.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "testing.h"
diff --git a/tests/shared/testing/testing_misc.c b/tests/shared/testing/testing_misc.c
index 65fb0012..a5cb09ad 100644
--- a/tests/shared/testing/testing_misc.c
+++ b/tests/shared/testing/testing_misc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/tests/shared/testing/testing_print.c b/tests/shared/testing/testing_print.c
index 29e0e2ad..55573c20 100644
--- a/tests/shared/testing/testing_print.c
+++ b/tests/shared/testing/testing_print.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include "testing.h"
diff --git a/tests/vect_tests/src/main.c b/tests/vect_tests/src/main.c
index 2f6562c7..37b53347 100644
--- a/tests/vect_tests/src/main.c
+++ b/tests/vect_tests/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdio.h>
diff --git a/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s16_yield_s32.c b/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s16_yield_s32.c
index dc774d7f..639ae813 100644
--- a/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s16_yield_s32.c
+++ b/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s16_yield_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s8.c b/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s8.c
index eb6d9025..7b78e879 100644
--- a/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s8.c
+++ b/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s8.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/tst_asserts.h b/tests/vect_tests/src/tst_asserts.h
index 1ad12abf..d7a8eedd 100644
--- a/tests/vect_tests/src/tst_asserts.h
+++ b/tests/vect_tests/src/tst_asserts.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/vect_tests/src/tst_common.c b/tests/vect_tests/src/tst_common.c
index f9a6fae6..048d5058 100644
--- a/tests/vect_tests/src/tst_common.c
+++ b/tests/vect_tests/src/tst_common.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #include "tst_common.h"
 
diff --git a/tests/vect_tests/src/tst_common.h b/tests/vect_tests/src/tst_common.h
index fc7ba5bd..ec0249d5 100644
--- a/tests/vect_tests/src/tst_common.h
+++ b/tests/vect_tests/src/tst_common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/vect_tests/src/unity_config.h b/tests/vect_tests/src/unity_config.h
index 3b0cc6a1..c19827fa 100644
--- a/tests/vect_tests/src/unity_config.h
+++ b/tests/vect_tests/src/unity_config.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once
 
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_add.c b/tests/vect_tests/src/vect/complex/test_vect_complex_add.c
index 6dfda7f9..531b1fa8 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_add.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_add.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_add_scalar.c b/tests/vect_tests/src/vect/complex/test_vect_complex_add_scalar.c
index 489f3fff..f6ec85bd 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_add_scalar.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_add_scalar.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_complex_scale.c b/tests/vect_tests/src/vect/complex/test_vect_complex_complex_scale.c
index 0d7d5a07..10368937 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_complex_scale.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_complex_scale.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -15,6 +15,8 @@
 
 #include "unity_fixture.h"
 
+#define INT16_WIGGLE 4
+
 TEST_GROUP_RUNNER(vect_complex_scale) {
   RUN_TEST_CASE(vect_complex_scale, vect_complex_s32_scale_random);
   RUN_TEST_CASE(vect_complex_scale, vect_complex_s32_scale_basic);
@@ -168,8 +170,13 @@ TEST(vect_complex_scale, vect_complex_s16_scale_basic)
             headroom_t hrre, hrim;
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line);
-                TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]);
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #endif
             }
             hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
             TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line);
@@ -181,8 +188,13 @@ TEST(vect_complex_scale, vect_complex_s16_scale_basic)
                                                        len, casse->sat);
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line);
-                TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]);
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #endif
             }
             hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
             TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line);
@@ -243,8 +255,13 @@ TEST(vect_complex_scale, vect_complex_s16_scale_random)
         for(unsigned int i = 0; i < len; i++){
             complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C.re, C.im, sat);
             
-            TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]);
+            #else
+                TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
+                TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #endif
         }
         hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
         TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v);
@@ -257,8 +274,13 @@ TEST(vect_complex_scale, vect_complex_s16_scale_random)
 
         for(unsigned int i = 0; i < len; i++){
             complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C.re, C.im, sat);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]);
+            #else
+                TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
+                TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #endif
         }
         hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
         TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v);
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_conj_macc.c b/tests/vect_tests/src/vect/complex/test_vect_complex_conj_macc.c
index 35fc6e41..f68c6270 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_conj_macc.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_conj_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate.c b/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate.c
index de99a738..a75ec0a4 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate_mul.c b/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate_mul.c
index 7d427a4c..74ce805a 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate_mul.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -15,6 +15,7 @@
 
 #include "unity_fixture.h"
 
+#define INT16_WIGGLE 4
 
 TEST_GROUP_RUNNER(vect_complex_conj_mul) {
   RUN_TEST_CASE(vect_complex_conj_mul, vect_complex_s16_conj_mul_basic);
@@ -170,9 +171,14 @@ TEST(vect_complex_conj_mul, vect_complex_s16_conj_mul_basic)
                                                   len, casse->sat);
             headroom_t hrre, hrim;
 
-            for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line);
-                TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line);
+            for(unsigned int i = 0; i < len; i++){                
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]);
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #endif
             }
             hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
             TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line);
@@ -184,8 +190,13 @@ TEST(vect_complex_conj_mul, vect_complex_s16_conj_mul_basic)
                                                   len, casse->sat);
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line);
-                TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]);
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #endif
             }
             hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
             TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line);
@@ -245,8 +256,13 @@ TEST(vect_complex_conj_mul, vect_complex_s16_conj_mul_random)
         for(unsigned int i = 0; i < len; i++){
             complex_s16_t expected = mul_complex_conj_s16(B.real[i], B.imag[i], C.real[i], C.imag[i], sat);
             
-            TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]);
+            #else
+                TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
+                TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #endif
         }
         hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
         TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v);
@@ -259,8 +275,13 @@ TEST(vect_complex_conj_mul, vect_complex_s16_conj_mul_random)
 
         for(unsigned int i = 0; i < len; i++){
             complex_s16_t expected = mul_complex_conj_s16(B.real[i], B.imag[i], C.real[i], C.imag[i], sat);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]);
+            #else
+                TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
+                TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #endif
         }
         hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
         TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v);
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_macc.c b/tests/vect_tests/src/vect/complex/test_vect_complex_macc.c
index 5e432a39..e243cfd2 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_macc.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_mag.c b/tests/vect_tests/src/vect/complex/test_vect_complex_mag.c
index da198324..87e11f97 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_mag.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_mag.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -67,7 +67,7 @@ static int16_t mag_complex_s16(complex_s16_t b, right_shift_t b_shr)
 
     double mag = sqrt(sqr_mag);
 
-    int16_t a = (int16_t) lround(mag);
+    int16_t a = (int16_t) llround(mag);
     
     return SAT(16)(a);
 }
@@ -88,7 +88,7 @@ static int32_t mag_complex_s32(complex_s32_t b, right_shift_t b_shr)
 
     double mag = sqrt((double) sqr_mag);
 
-    int32_t a = lround(mag);
+    int32_t a = llround(mag);
     
     return SAT(32)(a);
 }
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_mul.c b/tests/vect_tests/src/vect/complex/test_vect_complex_mul.c
index 824b43dd..39c39743 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_mul.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -15,6 +15,7 @@
 
 #include "unity_fixture.h"
 
+#define INT16_WIGGLE 4
 
 TEST_GROUP_RUNNER(vect_complex_mul) {
   RUN_TEST_CASE(vect_complex_mul, vect_complex_s16_mul_prepare)
@@ -248,8 +249,13 @@ TEST(vect_complex_mul, vect_complex_s16_mul_basic)
             headroom_t hrre, hrim;
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line);
-                TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]);
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #endif
             }
             hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
             TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line);
@@ -261,8 +267,13 @@ TEST(vect_complex_mul, vect_complex_s16_mul_basic)
                                                   len, casse->sat);
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line);
-                TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]);
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #endif
             }
             hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
             TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line);
@@ -319,8 +330,13 @@ TEST(vect_complex_mul, vect_complex_s16_mul_random)
         for(unsigned int i = 0; i < len; i++){
             complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C.real[i], C.imag[i], sat);
             
-            TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]);
+            #else
+                TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
+                TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #endif
         }
         hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
         TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v);
@@ -333,8 +349,13 @@ TEST(vect_complex_mul, vect_complex_s16_mul_random)
 
         for(unsigned int i = 0; i < len; i++){
             complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C.real[i], C.imag[i], sat);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]);
+            #else
+                TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
+                TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #endif
         }
         hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len);
         TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v);
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_real_mul.c b/tests/vect_tests/src/vect/complex/test_vect_complex_real_mul.c
index 99f1532d..577bc05f 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_real_mul.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_real_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -15,6 +15,8 @@
 
 #include "unity_fixture.h"
 
+#define INT16_WIGGLE 4
+
 TEST_GROUP_RUNNER(vect_complex_real_mul) {
   RUN_TEST_CASE(vect_complex_real_mul, vect_complex_s16_real_mul_prepare);
   RUN_TEST_CASE(vect_complex_real_mul, vect_complex_s16_real_mul_basic);
@@ -164,9 +166,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_basic)
         //Verify mul_complex_s16() is correct. It's used in other test cases.
         complex_s16_t tmp = mul_complex_s16(casse->value.b.re, casse->value.b.im,
                                             casse->value.c, casse->sat);
-                                            
-        TEST_ASSERT_EQUAL_MSG(casse->expected.re, tmp.re, casse->line);
-        TEST_ASSERT_EQUAL_MSG(casse->expected.im, tmp.im, casse->line);
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, tmp.re);
+            TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, tmp.im);
+        #else
+            TEST_ASSERT_EQUAL_MSG(casse->expected.re, tmp.re, casse->line);
+            TEST_ASSERT_EQUAL_MSG(casse->expected.im, tmp.im, casse->line);
+        #endif
 
         unsigned lengths[] = {1, 4, 16, 32, 40 };
         
@@ -192,8 +198,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_basic)
                                           C, len, casse->sat);
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
-                TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]);
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #endif
             }
 
             headroom_t exp_hr = vect_complex_s16_headroom(A.real, A.imag, len);
@@ -205,8 +216,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_basic)
                                           C, len, casse->sat);
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
-                TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]);
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line);
+                    TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line);
+                #endif
             }
 
             exp_hr = vect_complex_s16_headroom(A.real, A.imag, len);
@@ -263,8 +279,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_random)
         for(unsigned int i = 0; i < len; i++){
             complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C[i], sat);
             
-            TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]);
+            #else
+                TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
+                TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #endif
         }
 
         TEST_ASSERT_EQUAL_MSG( vect_complex_s16_headroom(A.real,A.imag,len),  hr, v);
@@ -276,8 +297,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_random)
 
         for(unsigned int i = 0; i < len; i++){
             complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C[i], sat);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
-            TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]);
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]);
+            #else
+                TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff);
+                TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff);
+            #endif
         }
 
         TEST_ASSERT_EQUAL_MSG( vect_complex_s16_headroom(A.real,A.imag,len),  hr, v);
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_real_scale.c b/tests/vect_tests/src/vect/complex/test_vect_complex_real_scale.c
index 7e5e6b01..7d41e0d9 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_real_scale.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_real_scale.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -136,7 +136,7 @@ TEST(vect_complex_real_scale, vect_complex_s16_real_scale)
 
                 TEST_ASSERT_INT16_WITHIN(1, exp, A.real[i]);
 
-                exp = lround(ldexp( ((int32_t)B.imag[i]) * C, -sat) + ldexp(1, -40));
+                exp = llround(ldexp( ((int32_t)B.imag[i]) * C, -sat) + ldexp(1, -40));
                 exp = MIN(exp, VPU_INT16_MAX);
                 exp = MAX(exp, VPU_INT16_MIN);
                 TEST_ASSERT_INT16_WITHIN(1, exp, A.imag[i]);
@@ -162,7 +162,7 @@ TEST(vect_complex_real_scale, vect_complex_s16_real_scale)
                 exp = MAX(exp, VPU_INT16_MIN);
                 TEST_ASSERT_INT16_WITHIN(1, exp, A.real[i]);
 
-                exp = lround(ldexp( ((int32_t)B.imag[i]) * C, -sat) + ldexp(1, -40));
+                exp = llround(ldexp( ((int32_t)B.imag[i]) * C, -sat) + ldexp(1, -40));
                 exp = MIN(exp, VPU_INT16_MAX);
                 exp = MAX(exp, VPU_INT16_MIN);
                 TEST_ASSERT_INT16_WITHIN(1, exp, A.imag[i]);
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_s16_to_complex_s32.c b/tests/vect_tests/src/vect/complex/test_vect_complex_s16_to_complex_s32.c
index 03a56290..615b2eb6 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_s16_to_complex_s32.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_s16_to_complex_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_s32_to_complex_s16.c b/tests/vect_tests/src/vect/complex/test_vect_complex_s32_to_complex_s16.c
index 2f513796..090883b2 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_s32_to_complex_s16.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_s32_to_complex_s16.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_squared_mag.c b/tests/vect_tests/src/vect/complex/test_vect_complex_squared_mag.c
index 713009cf..26ca2613 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_squared_mag.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_squared_mag.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -15,6 +15,8 @@
 
 #include "unity_fixture.h"
 
+#define INT16_WIGGLE 4
+
 TEST_GROUP_RUNNER(vect_complex_squared_mag) {
   RUN_TEST_CASE(vect_complex_squared_mag, vect_complex_s16_squared_mag_prepare);
   RUN_TEST_CASE(vect_complex_squared_mag, vect_complex_s32_squared_mag_prepare);
@@ -245,7 +247,11 @@ TEST(vect_complex_squared_mag, vect_complex_s16_squared_mag_basic)
             hr = vect_complex_s16_squared_mag(A, B.real, B.imag, len, casse->sat);
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected, A[i], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected, A[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected, A[i], casse->line);
+                #endif
             }
 
             TEST_ASSERT_EQUAL_MSG(vect_s16_headroom(A, len), hr, casse->line);
@@ -308,6 +314,13 @@ TEST(vect_complex_squared_mag, vect_complex_s16_squared_mag_random)
             TEST_ASSERT_EQUAL_MSG_FMT(expected, A[i],
                   "(test vect %d) (len: %u) (index %d): (mag(%d + i*%d))**2 >> %d",
                    v, len, i, B.real[i], B.imag[i], sat);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected, A[i]);
+            #else
+                TEST_ASSERT_EQUAL_MSG_FMT(expected, A[i],
+                    "(test vect %d) (len: %u) (index %d): (mag(%d + i*%d))**2 >> %d",
+                    v, len, i, B.real[i], B.imag[i], sat);
+            #endif
         }
         TEST_ASSERT_EQUAL_MSG(vect_s16_headroom(A, len), hr, v);
     }
diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_sub.c b/tests/vect_tests/src/vect/complex/test_vect_complex_sub.c
index 93dabd29..1f25b126 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_complex_sub.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_complex_sub.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/complex/test_vect_sum_complex.c b/tests/vect_tests/src/vect/complex/test_vect_sum_complex.c
index a6aaeb91..b11ef2e8 100644
--- a/tests/vect_tests/src/vect/complex/test_vect_sum_complex.c
+++ b/tests/vect_tests/src/vect/complex/test_vect_sum_complex.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -157,8 +157,14 @@ TEST(vect_complex_sum, vect_complex_s16_sum_basic)
                 ((int32_t) casse->b.re) * len,
                 ((int32_t) casse->b.im) * len};
 
-            TEST_ASSERT_EQUAL_MSG(exp.re, result.re, casse->line);
-            TEST_ASSERT_EQUAL_MSG(exp.im, result.im, casse->line);
+            #if defined(__VX4B__)
+                //this casts to 32 bit because it night not fit in 16 bits (due to rounding)
+                TEST_ASSERT_INT32_WITHIN(4, exp.re, result.re);
+                TEST_ASSERT_INT32_WITHIN(4, exp.im, result.im);
+            #else
+                TEST_ASSERT_EQUAL_MSG(exp.re, result.re, casse->line);
+                TEST_ASSERT_EQUAL_MSG(exp.im, result.im, casse->line);
+            #endif  
         }
     }
 }
@@ -201,8 +207,14 @@ TEST(vect_complex_sum, vect_complex_s16_sum_random)
 
         result = vect_complex_s16_sum(B.real, B.imag, len);
 
-        TEST_ASSERT_EQUAL(expected.re, result.re);
-        TEST_ASSERT_EQUAL(expected.im, result.im);
+        #if defined(__VX4B__)
+            //this casts to 32 bit because it night not fit in 16 bits (due to rounding)
+            TEST_ASSERT_INT32_WITHIN(4, expected.re, result.re);
+            TEST_ASSERT_INT32_WITHIN(4, expected.im, result.im);
+        #else        
+            TEST_ASSERT_EQUAL(expected.re, result.re);
+            TEST_ASSERT_EQUAL(expected.im, result.im);
+        #endif 
         
     }
 }
diff --git a/tests/vect_tests/src/vect/float/test_vect_complex_f32_macc.c b/tests/vect_tests/src/vect/float/test_vect_complex_f32_macc.c
index 47fbd1dc..a5e5bdd8 100644
--- a/tests/vect_tests/src/vect/float/test_vect_complex_f32_macc.c
+++ b/tests/vect_tests/src/vect/float/test_vect_complex_f32_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/float/test_vect_complex_f32_mul.c b/tests/vect_tests/src/vect/float/test_vect_complex_f32_mul.c
index 97521280..2135f8b1 100644
--- a/tests/vect_tests/src/vect/float/test_vect_complex_f32_mul.c
+++ b/tests/vect_tests/src/vect/float/test_vect_complex_f32_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/float/test_vect_f32_add.c b/tests/vect_tests/src/vect/float/test_vect_f32_add.c
index 82e00b3b..a6c07be3 100644
--- a/tests/vect_tests/src/vect/float/test_vect_f32_add.c
+++ b/tests/vect_tests/src/vect/float/test_vect_f32_add.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/float/test_vect_f32_dot.c b/tests/vect_tests/src/vect/float/test_vect_f32_dot.c
index fa6a1c45..1b716cee 100644
--- a/tests/vect_tests/src/vect/float/test_vect_f32_dot.c
+++ b/tests/vect_tests/src/vect/float/test_vect_f32_dot.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/float/test_vect_f32_max_exponent.c b/tests/vect_tests/src/vect/float/test_vect_f32_max_exponent.c
index dbf643e3..61fe0cd8 100644
--- a/tests/vect_tests/src/vect/float/test_vect_f32_max_exponent.c
+++ b/tests/vect_tests/src/vect/float/test_vect_f32_max_exponent.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/float/test_vect_f32_to_s32.c b/tests/vect_tests/src/vect/float/test_vect_f32_to_s32.c
index 83bc81f0..d9823844 100644
--- a/tests/vect_tests/src/vect/float/test_vect_f32_to_s32.c
+++ b/tests/vect_tests/src/vect/float/test_vect_f32_to_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/float/test_vect_s32_to_f32.c b/tests/vect_tests/src/vect/float/test_vect_s32_to_f32.c
index 5344066f..d9d8a4b8 100644
--- a/tests/vect_tests/src/vect/float/test_vect_s32_to_f32.c
+++ b/tests/vect_tests/src/vect/float/test_vect_s32_to_f32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/stat/test_vect_abs_sum.c b/tests/vect_tests/src/vect/stat/test_vect_abs_sum.c
index 77e5a021..6ec955eb 100644
--- a/tests/vect_tests/src/vect/stat/test_vect_abs_sum.c
+++ b/tests/vect_tests/src/vect/stat/test_vect_abs_sum.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -83,11 +83,15 @@ TEST(vect_abs_sum, vect_s16_abs_sum_basic)
             t = (t>=0)? t : -t;
             // t = (t>=0)? t : 0x7FFF; // because -1*(-0x8000) = -0x8000
             int32_t exp = t * len;
-
-            XTEST_ASSERT_S32_EQUAL(exp, result,
-                "Case @ line %u\n"
-                "length: %u\n",
-                casse->line, len);
+            #if defined(__VX4B__)
+                //this casts to 32 bit because it night not fit in 16 bits (due to rounding)
+                TEST_ASSERT_INT32_WITHIN(4, exp, result);
+            #else
+                XTEST_ASSERT_S32_EQUAL(exp, result,
+                    "Case @ line %u\n"
+                    "length: %u\n",
+                    casse->line, len);
+            #endif  
         }
     }
 }
@@ -129,8 +133,12 @@ TEST(vect_abs_sum, vect_s16_abs_sum_random)
             b = (b>=0)? b : -b;
             exp += b;
         }
-
-        TEST_ASSERT_EQUAL_MESSAGE(exp, result, "");
+        #if defined(__VX4B__)
+            //this casts to 32 bit because it night not fit in 16 bits (due to rounding)
+            TEST_ASSERT_INT32_WITHIN(4, exp, result);
+        #else
+            TEST_ASSERT_EQUAL_MESSAGE(exp, result, "");
+        #endif 
     }
 }
 #undef MAX_LEN
diff --git a/tests/vect_tests/src/vect/stat/test_vect_argmax.c b/tests/vect_tests/src/vect/stat/test_vect_argmax.c
index 90fa6195..ccba4652 100644
--- a/tests/vect_tests/src/vect/stat/test_vect_argmax.c
+++ b/tests/vect_tests/src/vect/stat/test_vect_argmax.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/stat/test_vect_argmin.c b/tests/vect_tests/src/vect/stat/test_vect_argmin.c
index a1082537..fa7ec0eb 100644
--- a/tests/vect_tests/src/vect/stat/test_vect_argmin.c
+++ b/tests/vect_tests/src/vect/stat/test_vect_argmin.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/stat/test_vect_energy.c b/tests/vect_tests/src/vect/stat/test_vect_energy.c
index 154b8dd9..be5e35d3 100644
--- a/tests/vect_tests/src/vect/stat/test_vect_energy.c
+++ b/tests/vect_tests/src/vect/stat/test_vect_energy.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -84,7 +84,12 @@ TEST(vect_energy, vect_s16_energy)
 
         result = vect_s16_energy(B, len, b_shr);
 
-        TEST_ASSERT_EQUAL(exp, result);
+        #if defined(__VX4B__)
+            //this casts to 32 bit because it night not fit in 16 bits (due to rounding)
+            TEST_ASSERT_INT32_WITHIN(4, exp, result);
+        #else
+            TEST_ASSERT_EQUAL(exp, result);
+        #endif  
         
     }
 }
diff --git a/tests/vect_tests/src/vect/stat/test_vect_max.c b/tests/vect_tests/src/vect/stat/test_vect_max.c
index 4130060e..8301885e 100644
--- a/tests/vect_tests/src/vect/stat/test_vect_max.c
+++ b/tests/vect_tests/src/vect/stat/test_vect_max.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/stat/test_vect_min.c b/tests/vect_tests/src/vect/stat/test_vect_min.c
index 96f99cc1..3b39165b 100644
--- a/tests/vect_tests/src/vect/stat/test_vect_min.c
+++ b/tests/vect_tests/src/vect/stat/test_vect_min.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_chunk_s16_accumulate.c b/tests/vect_tests/src/vect/test_chunk_s16_accumulate.c
index 150a93dd..ff49e7d7 100644
--- a/tests/vect_tests/src/vect/test_chunk_s16_accumulate.c
+++ b/tests/vect_tests/src/vect/test_chunk_s16_accumulate.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_abs.c b/tests/vect_tests/src/vect/test_vect_abs.c
index 7f4a04d1..39014d58 100644
--- a/tests/vect_tests/src/vect/test_vect_abs.c
+++ b/tests/vect_tests/src/vect/test_vect_abs.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -77,16 +77,28 @@ TEST(vect_abs, vect_s16_abs)
         memset(A, 0xCC, sizeof(A));
         hr = vect_s16_abs(A, B, len);
 
-        for(unsigned int i = 0; i < len; i++)
-            TEST_ASSERT_EQUAL(B[i] >= 0? B[i] : -B[i], A[i]);
+        for(unsigned int i = 0; i < len; i++){
+            #if defined(__VX4B__)
+                //this casts to 32 bit because it night not fit in 16 bits (due to rounding)
+                TEST_ASSERT_INT32_WITHIN(4, B[i] >= 0? B[i] : -B[i], A[i]);
+            #else
+                TEST_ASSERT_EQUAL(B[i] >= 0? B[i] : -B[i], A[i]);
+            #endif 
+        }
         for(int i = len; i < MAX_LEN; i++)
             TEST_ASSERT_EQUAL((int16_t)0xCCCC, A[i]);
 
         memcpy(A, B, sizeof(A));
         hr = vect_s16_abs(A, A, len);
 
-        for(unsigned int i = 0; i < len; i++)
-            TEST_ASSERT_EQUAL(B[i] >= 0? B[i] : -B[i], A[i]);
+        for(unsigned int i = 0; i < len; i++){
+            #if defined(__VX4B__)
+                //this casts to 32 bit because it night not fit in 16 bits (due to rounding)
+                TEST_ASSERT_INT32_WITHIN(4, B[i] >= 0? B[i] : -B[i], A[i]);
+            #else
+                TEST_ASSERT_EQUAL(B[i] >= 0? B[i] : -B[i], A[i]);
+            #endif 
+        }
     }
 }
 
diff --git a/tests/vect_tests/src/vect/test_vect_add.c b/tests/vect_tests/src/vect/test_vect_add.c
index 2464324c..83f1b303 100644
--- a/tests/vect_tests/src/vect/test_vect_add.c
+++ b/tests/vect_tests/src/vect/test_vect_add.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -185,7 +185,7 @@ TEST(vect_add, vect_s16_add_basic)
         {       { -0x0001,   0x0001 },     {  0,  0 },    0x0000,       __LINE__},
         {       {  0x1010,   0x0101 },     {  0,  0 },    0x1111,       __LINE__},
         {       { -0x1010,  -0x0101 },     {  0,  0 },   -0x1111,       __LINE__},
-        {       { -0x8000,   0x0000 },     {  0,  0 },   -0x7FFF,       __LINE__},
+        {       { -0x8000,   0x0000 },     {  0,  0 },   VPU_INT16_MIN, __LINE__},
         {       {  0x4000,   0x4000 },     {  0,  0 },    0x7FFF,       __LINE__},
         {       {  0x7FFF,   0x7FFF },     {  0,  0 },    0x7FFF,       __LINE__},
 
@@ -345,7 +345,7 @@ TEST(vect_add, vect_s32_add_basic)
         {       {          -0x00000001,      0x00000001 },     {   0,   0 },    0x00000000,       __LINE__},
         {       {           0x00001010,      0x00000101 },     {   0,   0 },    0x00001111,       __LINE__},
         {       {          -0x00001010,     -0x00000101 },     {   0,   0 },   -0x00001111,       __LINE__},
-        {       { (int) (0-0x80000000),      0x00000000 },     {   0,   0 },   -0x7FFFFFFF,       __LINE__},
+        {       { (int) (0-0x80000000),      0x00000000 },     {   0,   0 }, VPU_INT32_MIN,       __LINE__},
         {       {           0x40000000,      0x40000000 },     {   0,   0 },    0x7FFFFFFF,       __LINE__},
         {       {           0x7FFFFFFF,      0x7FFFFFFF },     {   0,   0 },    0x7FFFFFFF,       __LINE__},
 
diff --git a/tests/vect_tests/src/vect/test_vect_add_scalar.c b/tests/vect_tests/src/vect/test_vect_add_scalar.c
index 3f9e4105..4c70dc0f 100644
--- a/tests/vect_tests/src/vect/test_vect_add_scalar.c
+++ b/tests/vect_tests/src/vect/test_vect_add_scalar.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_bitdepth_convert.c b/tests/vect_tests/src/vect/test_vect_bitdepth_convert.c
index d98043dc..8682a8d9 100644
--- a/tests/vect_tests/src/vect/test_vect_bitdepth_convert.c
+++ b/tests/vect_tests/src/vect/test_vect_bitdepth_convert.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -151,7 +151,7 @@ TEST(vect_bitdepth_convert, vect_s32_to_vect_s16_basic)
         {   0x00000100,      0,      0x0100,    __LINE__},
         {  -0x00000100,      0,     -0x0100,    __LINE__},
         {   0x00008000,      0,      0x7FFF,    __LINE__},
-        {  -0x00008000,      0,     -0x7FFF,    __LINE__},
+        {  -0x00008000,      0,     VPU_INT16_MIN,    __LINE__},
         {   0x00000001,      1,      0x0001,    __LINE__},
         {  -0x00000001,      1,      0x0000,    __LINE__}, //ties round towards positive infty
         {   0x00018000,     16,      0x0002,    __LINE__},
diff --git a/tests/vect_tests/src/vect/test_vect_clip.c b/tests/vect_tests/src/vect/test_vect_clip.c
index 9bb3675a..7f691167 100644
--- a/tests/vect_tests/src/vect/test_vect_clip.c
+++ b/tests/vect_tests/src/vect/test_vect_clip.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -58,18 +58,18 @@ TEST(vect_clip, vect_s16_clip_basic) //
 
     test_case_t casses[] = {
         //       b   b_shr      lower      upper      exp        line num
-        {   0x0000,     0,     -0x7FFF,    0x7FFF,    0x0000,       __LINE__},
-        {   0x0001,     0,     -0x7FFF,    0x7FFF,    0x0001,       __LINE__},
-        {   0x000A,     0,     -0x7FFF,    0x7FFF,    0x000A,       __LINE__},
-        {   0x0F00,     0,     -0x7FFF,    0x7FFF,    0x0F00,       __LINE__},
-        {  -0x0FFF,     0,     -0x7FFF,    0x7FFF,   -0x0FFF,       __LINE__},
-        {  -0x8000,     0,     -0x7FFF,    0x7FFF,   -0x7FFF,       __LINE__},
-
-        {   0x0000,     2,     -0x7FFF,    0x7FFF,    0x0000,       __LINE__},
-        {   0x0002,     1,     -0x7FFF,    0x7FFF,    0x0001,       __LINE__},
-        {   0x0005,    -1,     -0x7FFF,    0x7FFF,    0x000A,       __LINE__},
-        {   0x0F00,     4,     -0x7FFF,    0x7FFF,    0x00F0,       __LINE__},
-        {  -0x0FFF,    -1,     -0x7FFF,    0x7FFF,   -0x1FFE,       __LINE__},
+        {   0x0000,     0,     INT16_MIN,    0x7FFF,    0x0000,       __LINE__},
+        {   0x0001,     0,     INT16_MIN,    0x7FFF,    0x0001,       __LINE__},
+        {   0x000A,     0,     INT16_MIN,    0x7FFF,    0x000A,       __LINE__},
+        {   0x0F00,     0,     INT16_MIN,    0x7FFF,    0x0F00,       __LINE__},
+        {  -0x0FFF,     0,     INT16_MIN,    0x7FFF,   -0x0FFF,       __LINE__},
+        {  INT16_MIN,   0,     INT16_MIN,    0x7FFF,   VPU_INT16_MIN,       __LINE__},
+
+        {   0x0000,     2,     INT16_MIN,    0x7FFF,    0x0000,       __LINE__},
+        {   0x0002,     1,     INT16_MIN,    0x7FFF,    0x0001,       __LINE__},
+        {   0x0005,    -1,     INT16_MIN,    0x7FFF,    0x000A,       __LINE__},
+        {   0x0F00,     4,     INT16_MIN,    0x7FFF,    0x00F0,       __LINE__},
+        {  -0x0FFF,    -1,     INT16_MIN,    0x7FFF,   -0x1FFE,       __LINE__},
         
         {   0x0000,     0,     -0x1000,    0x1000,    0x0000,       __LINE__},
         {   0x0100,     0,     -0x1000,    0x1000,    0x0100,       __LINE__},
@@ -84,7 +84,7 @@ TEST(vect_clip, vect_s16_clip_basic) //
         {   0x2100,     0,      0x1000,    0x2000,    0x2000,       __LINE__},
         {  -0x2100,     0,      0x1000,    0x2000,    0x1000,       __LINE__},
         {   0x7FFF,     0,      0x1000,    0x2000,    0x2000,       __LINE__},
-        {  -0x7FFF,     0,      0x1000,    0x2000,    0x1000,       __LINE__},
+        {  INT16_MIN,   0,      0x1000,    0x2000,    0x1000,       __LINE__},
         {   0x7000,     0,      0x1000,    0x2000,    0x2000,       __LINE__},
         {  -0x7000,     0,      0x1000,    0x2000,    0x1000,       __LINE__},
         {   0x7000,     1,      0x1000,    0x2000,    0x2000,       __LINE__},
@@ -95,7 +95,7 @@ TEST(vect_clip, vect_s16_clip_basic) //
         {   0x0100,     0,     -0x2000,   -0x1000,   -0x1000,       __LINE__},
         {  -0x2100,     0,     -0x2000,   -0x1000,   -0x2000,       __LINE__},
         {   0x2100,     0,     -0x2000,   -0x1000,   -0x1000,       __LINE__},
-        {  -0x7FFF,     0,     -0x2000,   -0x1000,   -0x2000,       __LINE__},
+        {  INT16_MIN,   0,     -0x2000,   -0x1000,   -0x2000,       __LINE__},
         {   0x7FFF,     0,     -0x2000,   -0x1000,   -0x1000,       __LINE__},
         {  -0x7000,     0,     -0x2000,   -0x1000,   -0x2000,       __LINE__},
         {   0x7000,     0,     -0x2000,   -0x1000,   -0x1000,       __LINE__},
diff --git a/tests/vect_tests/src/vect/test_vect_copy.c b/tests/vect_tests/src/vect/test_vect_copy.c
index 4ed2b28c..a1a32e7c 100644
--- a/tests/vect_tests/src/vect/test_vect_copy.c
+++ b/tests/vect_tests/src/vect/test_vect_copy.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_dot.c b/tests/vect_tests/src/vect/test_vect_dot.c
index d64e2c20..4f363df6 100644
--- a/tests/vect_tests/src/vect/test_vect_dot.c
+++ b/tests/vect_tests/src/vect/test_vect_dot.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_exp.c b/tests/vect_tests/src/vect/test_vect_exp.c
index 70787ea6..3c2f6d89 100644
--- a/tests/vect_tests/src/vect/test_vect_exp.c
+++ b/tests/vect_tests/src/vect/test_vect_exp.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -58,7 +58,7 @@ TEST(vect_exp, chunk_q30_exp_small_RANDOM)
     for(unsigned int i = 0; i < length; i++){
       double bi = ldexp(B[i], b_exp);
       double exp_dbl = exp(bi);
-      expected[i] = lround(ldexp(exp_dbl, 30));
+      expected[i] = llround(ldexp(exp_dbl, 30));
     }
 
     // volatile uint32_t t0 = get_reference_time();
@@ -122,7 +122,7 @@ TEST(vect_exp, vect_q30_exp_small_RANDOM)
     for(unsigned int i = 0; i < length; i++){
       double bi = ldexp(B[i], b_exp);
       double exp_dbl = exp(bi);
-      expected[i] = lround(ldexp(exp_dbl, 30));
+      expected[i] = llround(ldexp(exp_dbl, 30));
     }
 
     // volatile uint32_t t0 = get_reference_time();
diff --git a/tests/vect_tests/src/vect/test_vect_headroom.c b/tests/vect_tests/src/vect/test_vect_headroom.c
index 92bdf0cf..5deee4e0 100644
--- a/tests/vect_tests/src/vect/test_vect_headroom.c
+++ b/tests/vect_tests/src/vect/test_vect_headroom.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_inverse.c b/tests/vect_tests/src/vect/test_vect_inverse.c
index 05d8e9f8..f5606b5a 100644
--- a/tests/vect_tests/src/vect/test_vect_inverse.c
+++ b/tests/vect_tests/src/vect/test_vect_inverse.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_log.c b/tests/vect_tests/src/vect/test_vect_log.c
index 6a7ee8d8..8d691772 100644
--- a/tests/vect_tests/src/vect/test_vect_log.c
+++ b/tests/vect_tests/src/vect/test_vect_log.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -113,7 +113,7 @@ TEST(vect_log, chunk_float_s32_log_RANDOM)
     for(unsigned int i = 0; i < length; i++){
       double bi = ldexp(B[i].mant, B[i].exp);
       double exp_dbl = log(bi);
-      expected[i] = lround(ldexp(exp_dbl, 24));
+      expected[i] = llround(ldexp(exp_dbl, 24));
     }
 
     // volatile uint32_t t0 = get_reference_time();
@@ -217,7 +217,7 @@ TEST(vect_log, chunk_s32_log)
       double exp_dbl = log(bi);
 
       if( B[i] == 0 ) expected[i] = -INT32_MAX;
-      else            expected[i] = lround(ldexp(exp_dbl, 24));
+      else            expected[i] = llround(ldexp(exp_dbl, 24));
     }
 
     // volatile uint32_t t0 = get_reference_time();
@@ -293,7 +293,7 @@ TEST(vect_log, vect_s32_log)
         default:  exp_dbl = log(bi) * inv_ln_output_base;
       }
 
-      expected[i] = lround(ldexp(exp_dbl, 24));
+      expected[i] = llround(ldexp(exp_dbl, 24));
     }
 
     // volatile uint32_t t0;
@@ -385,7 +385,7 @@ TEST(vect_log, vect_float_s32_log)
         default:  exp_dbl = log(bi) * inv_ln_output_base;
       }
 
-      expected[i] = lround(ldexp(exp_dbl, 24));
+      expected[i] = llround(ldexp(exp_dbl, 24));
     }
 
     // volatile uint32_t t0;
diff --git a/tests/vect_tests/src/vect/test_vect_macc.c b/tests/vect_tests/src/vect/test_vect_macc.c
index 05e922d7..d0a19528 100644
--- a/tests/vect_tests/src/vect/test_vect_macc.c
+++ b/tests/vect_tests/src/vect/test_vect_macc.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 // XMOS Public License: Version 1
 
diff --git a/tests/vect_tests/src/vect/test_vect_max_elementwise.c b/tests/vect_tests/src/vect/test_vect_max_elementwise.c
index 3535bf03..685f8086 100644
--- a/tests/vect_tests/src/vect/test_vect_max_elementwise.c
+++ b/tests/vect_tests/src/vect/test_vect_max_elementwise.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -11,6 +11,7 @@
 #include "xmath/xs3/vpu_scalar_ops.h"
 
 #include "../tst_common.h"
+//#include "../tst_asserts.h"
 #include "unity_fixture.h"
 
 TEST_GROUP(vect_max_elementwise);
diff --git a/tests/vect_tests/src/vect/test_vect_min_elementwise.c b/tests/vect_tests/src/vect/test_vect_min_elementwise.c
index e130b76d..833e1ba9 100644
--- a/tests/vect_tests/src/vect/test_vect_min_elementwise.c
+++ b/tests/vect_tests/src/vect/test_vect_min_elementwise.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_mul.c b/tests/vect_tests/src/vect/test_vect_mul.c
index a5456f60..9b85c81f 100644
--- a/tests/vect_tests/src/vect/test_vect_mul.c
+++ b/tests/vect_tests/src/vect/test_vect_mul.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -36,6 +36,11 @@ static char msg_buff[200];
       TEST_ASSERT_EQUAL_MESSAGE((EXPECTED), (ACTUAL), msg_buff);      \
     }} while(0)
 
+#if defined(__XS3A__)
+    #define XTEST_ASSERT_EQUAL_MSG(EXPECTED, ACTUAL, LINE_NUM) TEST_ASSERT_EQUAL_MSG(EXPECTED, ACTUAL, LINE_NUM) 
+#elif defined(__VX4B__)
+    #define XTEST_ASSERT_EQUAL_MSG(EXPECTED, ACTUAL, LINE_NUM) TEST_ASSERT_INT16_WITHIN(1, EXPECTED, ACTUAL)
+#endif
 
 /**
  * This is a VLMACC-based multiply, which means the right-shift
@@ -47,10 +52,15 @@ static int16_t mul_s16(int16_t b, int16_t c, int a_shr)
     int32_t A = ((int32_t)b)*c;
     int32_t a = A;
 
-    if(a_shr != 0)
-      a = a + (1 << (a_shr-1));
+    if(a_shr != 0){
+        if (a_shr > 0){
+            a = a + (1 << (a_shr-1));
+             a = a >> a_shr;
+        } else {
+            a = a << (unsigned)(-a_shr);
+        }
+    }
       
-    a = a >> a_shr;
     a = (a >= VPU_INT16_MAX)? VPU_INT16_MAX : (a <= VPU_INT16_MIN)? VPU_INT16_MIN : a;
 
     return (int16_t) a;
@@ -224,7 +234,11 @@ TEST(vect_mul, vect_s16_mul_basic)
             hr = vect_s16_mul(A, A, C, len, casse->a_shr);
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected, A[0], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(4, casse->expected, A[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected, A[i], casse->line);
+                #endif
                 TEST_ASSERT_EQUAL_MSG(vect_s16_headroom(A, len), hr, casse->line);
             }
 
@@ -232,7 +246,11 @@ TEST(vect_mul, vect_s16_mul_basic)
             hr = vect_s16_mul(A, B, A, len, casse->a_shr);
 
             for(unsigned int i = 0; i < len; i++){
-                TEST_ASSERT_EQUAL_MSG(casse->expected, A[0], casse->line);
+                #if defined(__VX4B__)
+                    TEST_ASSERT_INT16_WITHIN(4, casse->expected, A[i]);
+                #else
+                    TEST_ASSERT_EQUAL_MSG(casse->expected, A[i], casse->line);
+                #endif
                 TEST_ASSERT_EQUAL_MSG(vect_s16_headroom(A, len), hr, casse->line);
             }
 
@@ -275,27 +293,36 @@ TEST(vect_mul, vect_s16_mul_random)
         
         // A <-- B * C
         hr = vect_s16_mul(A, B, C, len, a_shr);
-
-        XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len,
-            debug_fmt, expected[i], B[i], C[i], a_shr, A[i] );
-        TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr);
         
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT16_ARRAY_WITHIN(1, expected, A, len);
+        #else
+            XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len,
+                debug_fmt, expected[i], B[i], C[i], a_shr, A[i] );
+        #endif
+        TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr);
         // A <-- B
         // A <-- A * C
         memcpy(A, B, sizeof(A[0])*len);
         hr = vect_s16_mul(A, A, C, len, a_shr);
-
-        XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len,
-            debug_fmt, expected[i], B[i], C[i], a_shr, A[i] );
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT16_ARRAY_WITHIN(1, expected, A, len);
+        #else
+            XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len,
+                debug_fmt, expected[i], B[i], C[i], a_shr, A[i] );
+        #endif
         TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr);
         
         // A <-- C
         // A <-- B * A
         memcpy(A, C, sizeof(A[0])*len);
         hr = vect_s16_mul(A, B, A, len, a_shr);
-
-        XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len,
-            debug_fmt, expected[i], B[i], C[i], a_shr, A[i] );
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT16_ARRAY_WITHIN(1, expected, A, len);
+        #else
+            XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len,
+                debug_fmt, expected[i], B[i], C[i], a_shr, A[i] );
+        #endif
         TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr);
         
     }
@@ -329,8 +356,8 @@ TEST(vect_mul, vect_s32_mul_basic)
         {       {       0x00004000,     0x00008000 },     {  0,  0 },      0x00000001,       __LINE__},
         {       {       0x00000400,     0x00000400 },     {  0,  0 },      0x00000000,       __LINE__},
         {       {       0x7f000000,     0x7f000000 },     {  0,  0 },      0x7fffffff,       __LINE__},
-        {       {       0x7f000000,    -0x7f000000 },     {  0,  0 },     -0x7fffffff,       __LINE__},
-        {       { (int) (0-0x80000000),     0x40000000 },     {  0,  0 },     -0x7fffffff,       __LINE__},
+        {       {       0x7f000000,    -0x7f000000 },     {  0,  0 },     VPU_INT32_MIN,       __LINE__},
+        {       { (int) (0-0x80000000),     0x40000000 },     {  0,  0 }, VPU_INT32_MIN,       __LINE__},
         {       {       0x40000000,     0x40000000 },     {  1,  0 },      0x20000000,       __LINE__},
         {       {       0x40000000,     0x40000000 },     {  0,  1 },      0x20000000,       __LINE__},
         {       {       0x40000000,     0x40000000 },     {  1,  1 },      0x10000000,       __LINE__},
@@ -339,7 +366,7 @@ TEST(vect_mul, vect_s32_mul_basic)
         {       {       0x40000000,     0x08000000 },     {  0, -2 },      0x20000000,       __LINE__},
         {       {       0x40000000,     0x08000000 },     {  0, -3 },      0x40000000,       __LINE__},
         {       {       0x40000000,     0x08000000 },     {  0, -4 },      0x7fffffff,       __LINE__},
-        {       {      -0x40000000,     0x08000000 },     {  0, -4 },     -0x7fffffff,       __LINE__},
+        {       {      -0x40000000,     0x08000000 },     {  0, -4 },     -0x7fffffff,       __LINE__}, ///????
         {       {       0x40000000,     0x08000000 },     {  1, -4 },      0x40000000,       __LINE__},
     };
 
diff --git a/tests/vect_tests/src/vect/test_vect_rect.c b/tests/vect_tests/src/vect/test_vect_rect.c
index 843ef5c0..c4e6a70e 100644
--- a/tests/vect_tests/src/vect/test_vect_rect.c
+++ b/tests/vect_tests/src/vect/test_vect_rect.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_s16_extract.c b/tests/vect_tests/src/vect/test_vect_s16_extract.c
index 1418613e..4682de9e 100644
--- a/tests/vect_tests/src/vect/test_vect_s16_extract.c
+++ b/tests/vect_tests/src/vect/test_vect_s16_extract.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_s32_convolve.c b/tests/vect_tests/src/vect/test_vect_s32_convolve.c
index 6454a13b..3621c75d 100644
--- a/tests/vect_tests/src/vect/test_vect_s32_convolve.c
+++ b/tests/vect_tests/src/vect/test_vect_s32_convolve.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_s8_boolean.c b/tests/vect_tests/src/vect/test_vect_s8_boolean.c
index d6af0c2c..1a11261a 100644
--- a/tests/vect_tests/src/vect/test_vect_s8_boolean.c
+++ b/tests/vect_tests/src/vect/test_vect_s8_boolean.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_scale.c b/tests/vect_tests/src/vect/test_vect_scale.c
index 45b7da26..e366d57f 100644
--- a/tests/vect_tests/src/vect/test_vect_scale.c
+++ b/tests/vect_tests/src/vect/test_vect_scale.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -35,7 +35,6 @@ static char msg_buff[200];
       TEST_ASSERT_EQUAL_MESSAGE((EXPECTED), (ACTUAL), msg_buff);      \
     }} while(0)
 
-
 static int16_t scalar_mul_s16(int16_t b, int16_t c, right_shift_t sat)
 {
     return vlsat16( ((int32_t)b) * c  , sat);
@@ -138,8 +137,8 @@ TEST(vect_scale, vect_s16_scale_basic)
         {       {  0x0040,   0x0080 },   14,    0x0001,       __LINE__},
         {       {  0x0040,   0x0040 },   14,    0x0000,       __LINE__},
         {       {  0x7f00,   0x7f00 },   14,    0x7fff,       __LINE__},
-        {       {  0x7f00,  -0x7f00 },   14,   -0x7fff,       __LINE__},
-        {       { -0x8000,   0x4000 },   14,   -0x7fff,       __LINE__},
+        {       {  0x7f00,  -0x7f00 },   14,   VPU_INT16_MIN,       __LINE__},
+        {       { -0x8000,   0x4000 },   14,   VPU_INT16_MIN,       __LINE__},
         {       {  0x4000,   0x4000 },   15,    0x2000,       __LINE__},
         {       {  0x4000,   0x2000 },   14,    0x2000,       __LINE__},
         {       {  0x4000,   0x2000 },   15,    0x1000,       __LINE__},
@@ -148,7 +147,7 @@ TEST(vect_scale, vect_s16_scale_basic)
         {       {  0x0800,   0x4000 },   12,    0x2000,       __LINE__},
         {       {  0x0800,   0x4000 },   11,    0x4000,       __LINE__},
         {       {  0x0800,   0x4000 },   10,    0x7fff,       __LINE__},
-        {       {  0x0800,  -0x4000 },   10,   -0x7fff,       __LINE__},
+        {       {  0x0800,  -0x4000 },   10,   VPU_INT16_MIN,       __LINE__},
         {       {  0x0800,   0x2000 },   10,    0x4000,       __LINE__},
 
         
@@ -230,7 +229,11 @@ TEST(vect_scale, vect_s16_scale_random)
         for(unsigned int i = 0; i < len; i++){
             int16_t expected = scalar_mul_s16(B[i], alpha, sat);
             if(expected != A[i]) sprintf(msg_buff, sprintpat,v, i, len, A[i], B[i], sat, alpha, (uint16_t)A[i], (uint16_t)B[i],  (uint16_t)alpha);
-            TEST_ASSERT_EQUAL_MESSAGE(expected, A[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(4, expected, A[i]);
+            #else 
+                TEST_ASSERT_EQUAL_MESSAGE(expected, A[i], msg_buff);
+            #endif
         }
         TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr);
         
@@ -240,7 +243,11 @@ TEST(vect_scale, vect_s16_scale_random)
         for(unsigned int i = 0; i < len; i++){
             int16_t expected = scalar_mul_s16(B[i], alpha, sat);
             if(expected != A[i]) sprintf(msg_buff, sprintpat,v, i, len, A[i], B[i], sat, alpha, (uint16_t)A[i], (uint16_t)B[i],  (uint16_t)alpha);
-            TEST_ASSERT_EQUAL_MESSAGE(expected, A[i], msg_buff);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(4, expected, A[i]);
+            #else 
+                TEST_ASSERT_EQUAL_MESSAGE(expected, A[i], msg_buff);
+            #endif
         }
         TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr);
     }
@@ -276,8 +283,8 @@ TEST(vect_scale, vect_s32_scale_basic)
         {       {           0x00004000,     0x00008000 },     {   0,   0 },      0x00000001,       __LINE__},
         {       {           0x00000400,     0x00000400 },     {   0,   0 },      0x00000000,       __LINE__},
         {       {           0x7f000000,     0x7f000000 },     {   0,   0 },      0x7fffffff,       __LINE__},
-        {       {           0x7f000000,    -0x7f000000 },     {   0,   0 },     -0x7fffffff,       __LINE__},
-        {       { (int) (0-0x80000000),     0x40000000 },     {   0,   0 },     -0x7fffffff,       __LINE__},
+        {       {           0x7f000000,    -0x7f000000 },     {   0,   0 },   VPU_INT32_MIN,       __LINE__},
+        {       { (int) (0-0x80000000),     0x40000000 },     {   0,   0 },   VPU_INT32_MIN,       __LINE__},
         {       {           0x40000000,     0x40000000 },     {   1,   0 },      0x20000000,       __LINE__},
         {       {           0x40000000,     0x20000000 },     {   0,   0 },      0x20000000,       __LINE__},
         {       {           0x40000000,     0x20000000 },     {   1,   0 },      0x10000000,       __LINE__},
diff --git a/tests/vect_tests/src/vect/test_vect_set.c b/tests/vect_tests/src/vect/test_vect_set.c
index f5eaa9f4..a1e3d216 100644
--- a/tests/vect_tests/src/vect/test_vect_set.c
+++ b/tests/vect_tests/src/vect/test_vect_set.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/vect_tests/src/vect/test_vect_shl.c b/tests/vect_tests/src/vect/test_vect_shl.c
index b9539632..ac5f00c3 100644
--- a/tests/vect_tests/src/vect/test_vect_shl.c
+++ b/tests/vect_tests/src/vect/test_vect_shl.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -82,8 +82,8 @@ TEST(vect_shl, vect_s16_shl_basic)
         {  -0x0008,     -3,     -0x0001,    __LINE__},
         {  -0x0008,     -4,     -0x0001,    __LINE__},
         {   0x1000,      3,      0x7FFF,    __LINE__},
-        {  -0x1000,      3,     -0x7FFF,    __LINE__},
-        {  -0x8000,      0,     -0x7FFF,    __LINE__},
+        {  -0x1000,      3,     VPU_INT16_MIN,    __LINE__},
+        {  -0x8000,      0,     VPU_INT16_MIN,    __LINE__},
     };
 
     const unsigned N_cases = sizeof(casses)/sizeof(test_case_t);
@@ -206,8 +206,8 @@ TEST(vect_shl, vect_s32_shl_basic)
         {          -0x00080000,   -3,     -0x00010000,    __LINE__},
         {          -0x00080000,   -4,     -0x00008000,    __LINE__},
         {           0x10000000,    3,      0x7FFFFFFF,    __LINE__},
-        {          -0x10000000,    3,     -0x7FFFFFFF,    __LINE__},
-        { (int) (0-0x80000000),    0,     -0x7FFFFFFF,    __LINE__},
+        {          -0x10000000,    3,     VPU_INT32_MIN,    __LINE__},
+        { (int) (0-0x80000000),    0,     VPU_INT32_MIN,    __LINE__},
     };
 
     const unsigned N_cases = sizeof(casses)/sizeof(test_case_t);
diff --git a/tests/vect_tests/src/vect/test_vect_shr.c b/tests/vect_tests/src/vect/test_vect_shr.c
index ee0a77b9..44cf3552 100644
--- a/tests/vect_tests/src/vect/test_vect_shr.c
+++ b/tests/vect_tests/src/vect/test_vect_shr.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -83,8 +83,8 @@ TEST(vect_shr, vect_s16_shr_basic)
         {  -0x0008,      3,     -0x0001,    __LINE__},
         {  -0x0008,      4,     -0x0001,    __LINE__},
         {   0x1000,     -3,      0x7FFF,    __LINE__},
-        {  -0x1000,     -3,     -0x7FFF,    __LINE__},
-        {  -0x8000,      0,     -0x7FFF,    __LINE__},
+        {  -0x1000,     -3,     VPU_INT16_MIN,    __LINE__},
+        {  -0x8000,      0,     VPU_INT16_MIN,    __LINE__},
     };
 
     const unsigned N_cases = sizeof(cases)/sizeof(test_case_t);
@@ -207,8 +207,8 @@ TEST(vect_shr, vect_s32_shr_basic)
         {           -0x00080000,        3,     -0x00010000,    __LINE__},
         {           -0x00080000,        4,     -0x00008000,    __LINE__},
         {            0x10000000,       -3,      0x7FFFFFFF,    __LINE__},
-        {           -0x10000000,       -3,     -0x7FFFFFFF,    __LINE__},
-        {  (int) (0-0x80000000),        0,     -0x7FFFFFFF,    __LINE__},
+        {           -0x10000000,       -3,      VPU_INT32_MIN,    __LINE__},
+        {  (int) (0-0x80000000),        0,      VPU_INT32_MIN,    __LINE__},
     };
 
     const unsigned N_cases = sizeof(cases)/sizeof(test_case_t);
diff --git a/tests/vect_tests/src/vect/test_vect_sqrt.c b/tests/vect_tests/src/vect/test_vect_sqrt.c
index 34ed703d..c7db2a4e 100644
--- a/tests/vect_tests/src/vect/test_vect_sqrt.c
+++ b/tests/vect_tests/src/vect/test_vect_sqrt.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -206,7 +206,11 @@ TEST(vect_sqrt, vect_s16_sqrt_A)
             int16_t p = vlmul16(A[i], A[i]);
             int16_t p2 = vlmul16(A[i]+1, A[i]+1);
 
-            TEST_ASSERT_LESS_OR_EQUAL_INT16(target, p);
+            #if defined(__VX4B__)
+                TEST_ASSERT_LESS_OR_EQUAL_INT16(target+1, p);
+            #else
+                TEST_ASSERT_LESS_OR_EQUAL_INT16(target, p);
+            #endif
             TEST_ASSERT_GREATER_OR_EQUAL_INT16(target, p2);
         }
     }
diff --git a/tests/vect_tests/src/vect/test_vect_sub.c b/tests/vect_tests/src/vect/test_vect_sub.c
index c79de1b0..594af85c 100644
--- a/tests/vect_tests/src/vect/test_vect_sub.c
+++ b/tests/vect_tests/src/vect/test_vect_sub.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -112,7 +112,7 @@ TEST(vect_sub, vect_s16_sub_basic)
         {       { -0x0001,  -0x0001 },     {  0,  0 },    0x0000,       __LINE__},
         {       {  0x1010,  -0x0101 },     {  0,  0 },    0x1111,       __LINE__},
         {       { -0x1010,   0x0101 },     {  0,  0 },   -0x1111,       __LINE__},
-        {       { -0x8000,  -0x0000 },     {  0,  0 },   -0x7FFF,       __LINE__},
+        {       { -0x8000,  -0x0000 },     {  0,  0 },   VPU_INT16_MIN,       __LINE__},
         {       {  0x4000,  -0x4000 },     {  0,  0 },    0x7FFF,       __LINE__},
         {       {  0x7FFF,  -0x7FFF },     {  0,  0 },    0x7FFF,       __LINE__},
 
@@ -272,7 +272,7 @@ TEST(vect_sub, vect_s32_sub_basic)
         {       {          -0x00000001,    -0x00000001 },     {   0,   0 },    0x00000000,       __LINE__},
         {       {           0x00001010,    -0x00000101 },     {   0,   0 },    0x00001111,       __LINE__},
         {       {          -0x00001010,     0x00000101 },     {   0,   0 },   -0x00001111,       __LINE__},
-        {       { (int) (0-0x80000000),    -0x00000000 },     {   0,   0 },   -0x7FFFFFFF,       __LINE__},
+        {       { (int) (0-0x80000000),    -0x00000000 },     {   0,   0 },   VPU_INT32_MIN,       __LINE__},
         {       {           0x40000000,    -0x40000000 },     {   0,   0 },    0x7FFFFFFF,       __LINE__},
         {       {           0x7FFFFFFF,    -0x7FFFFFFF },     {   0,   0 },    0x7FFFFFFF,       __LINE__},
 
@@ -375,7 +375,7 @@ TEST(vect_sub, vect_s32_sub_random)
         int b_shr = (pseudo_rand_uint32(&seed) % 5) - 2;
         int c_shr = (pseudo_rand_uint32(&seed) % 5) - 2;
         
-        const char sprintpat[] = "rep(%d)[%d of %u]: %ld <-- ((%ld >> %d) + (%ld >> %d))     (A[i]=0x%08X; B[i]=0x%08X; C[i]=0x%08X)";
+        const char sprintpat[] = "rep(%d)[%d of %u]: %ld <-- ((%ld >> %d) - (%ld >> %d))     (A[i]=0x%08X; B[i]=0x%08X; C[i]=0x%08X)";
 
         hr = vect_s32_sub(A, B, C, len, b_shr, c_shr);
 
diff --git a/tests/vect_tests/src/vect/test_vect_sum.c b/tests/vect_tests/src/vect/test_vect_sum.c
index 13ccc129..811abc78 100644
--- a/tests/vect_tests/src/vect/test_vect_sum.c
+++ b/tests/vect_tests/src/vect/test_vect_sum.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -80,7 +80,12 @@ TEST(vect_sum, vect_s16_sum_basic)
 
             int32_t exp = ((int32_t) casse->b) * len;
 
-            TEST_ASSERT_EQUAL_MSG(exp, result, casse->line);
+            #if defined(__VX4B__)
+                //this casts to 32 bit because it night not fit in 16 bits (due to rounding)
+                TEST_ASSERT_INT32_WITHIN(4, exp, result);
+            #else
+                TEST_ASSERT_EQUAL_MSG(exp, result, casse->line);
+            #endif  
         }
     }
 }
@@ -120,9 +125,11 @@ TEST(vect_sum, vect_s16_sum_random)
         for(unsigned int i = 0; i < len; i++){
             exp += B[i];
         }
-
-        TEST_ASSERT_EQUAL(exp, result);
-        
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT16_WITHIN(4, exp, result);
+        #else
+            TEST_ASSERT_EQUAL(exp, result);
+        #endif
     }
 }
 #undef MAX_LEN
diff --git a/tests/vect_tests/src/vect/test_vect_zip.c b/tests/vect_tests/src/vect/test_vect_zip.c
index 91a97ad3..fedffbfd 100644
--- a/tests/vect_tests/src/vect/test_vect_zip.c
+++ b/tests/vect_tests/src/vect/test_vect_zip.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/xs3_tests/src/dummy.xc b/tests/xs3_tests/src/dummy.xc
index 0cc0a8f4..ad633f70 100644
--- a/tests/xs3_tests/src/dummy.xc
+++ b/tests/xs3_tests/src/dummy.xc
@@ -1,4 +1,4 @@
-// Copyright 2022-2024 XMOS LIMITED.
+// Copyright 2022-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/xs3_tests/src/main.c b/tests/xs3_tests/src/main.c
index 5e022d27..bb6bf94b 100644
--- a/tests/xs3_tests/src/main.c
+++ b/tests/xs3_tests/src/main.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 
diff --git a/tests/xs3_tests/src/test_vpu_scalar_ops_s16.c b/tests/xs3_tests/src/test_vpu_scalar_ops_s16.c
index 74c7b70a..39e45b3c 100644
--- a/tests/xs3_tests/src/test_vpu_scalar_ops_s16.c
+++ b/tests/xs3_tests/src/test_vpu_scalar_ops_s16.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -220,7 +220,7 @@ TEST(vpu_scalar_ops_s16, vdepth8_16)
     {
         int8_t res = vdepth8_16( (int16_t) k );
 
-        int32_t exp = lround(ldexp( k, -8) + ldexp(1, -30));
+        int32_t exp = llround(ldexp( k, -8) + ldexp(1, -30));
         exp = MIN(exp, VPU_INT8_MAX);
         exp = MAX(exp, VPU_INT8_MIN);
 
diff --git a/tests/xs3_tests/src/test_vpu_scalar_ops_s32.c b/tests/xs3_tests/src/test_vpu_scalar_ops_s32.c
index 40a1bf01..183a3044 100644
--- a/tests/xs3_tests/src/test_vpu_scalar_ops_s32.c
+++ b/tests/xs3_tests/src/test_vpu_scalar_ops_s32.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
@@ -247,7 +247,7 @@ TEST(vpu_scalar_ops_s32, vdepth8_32)
 
         int8_t res = vdepth8_32( (int32_t) k );
 
-        int32_t exp = lround(ldexp( (double) k, -24) + ldexp(1, -40));
+        int32_t exp = llround(ldexp( (double) k, -24) + ldexp(1, -40));
         exp = MIN(exp, VPU_INT8_MAX);
         exp = MAX(exp, VPU_INT8_MIN);
 
@@ -279,7 +279,7 @@ TEST(vpu_scalar_ops_s32, vdepth16_32)
 
         int8_t res = (int8_t) vdepth16_32( (int32_t) k );
 
-        int32_t exp = lround(ldexp( (double) k, -16) + ldexp(1, -40));
+        int32_t exp = llround(ldexp( (double) k, -16) + ldexp(1, -40));
         exp = MIN(exp, VPU_INT16_MAX);
         exp = MAX(exp, VPU_INT16_MIN);
 
diff --git a/tests/xs3_tests/src/test_vpu_scalar_ops_s8.c b/tests/xs3_tests/src/test_vpu_scalar_ops_s8.c
index 45cc1e32..5575e76e 100644
--- a/tests/xs3_tests/src/test_vpu_scalar_ops_s8.c
+++ b/tests/xs3_tests/src/test_vpu_scalar_ops_s8.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #include <stdint.h>
diff --git a/tests/xs3_tests/src/tst_asserts.h b/tests/xs3_tests/src/tst_asserts.h
index 07a01c5e..676c84b1 100644
--- a/tests/xs3_tests/src/tst_asserts.h
+++ b/tests/xs3_tests/src/tst_asserts.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/xs3_tests/src/tst_common.c b/tests/xs3_tests/src/tst_common.c
index f9a6fae6..048d5058 100644
--- a/tests/xs3_tests/src/tst_common.c
+++ b/tests/xs3_tests/src/tst_common.c
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #include "tst_common.h"
 
diff --git a/tests/xs3_tests/src/tst_common.h b/tests/xs3_tests/src/tst_common.h
index 3275c311..dd1eb0cf 100644
--- a/tests/xs3_tests/src/tst_common.h
+++ b/tests/xs3_tests/src/tst_common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 
 #pragma once
diff --git a/tests/xs3_tests/src/unity_config.h b/tests/xs3_tests/src/unity_config.h
index 2998718d..c0699a63 100644
--- a/tests/xs3_tests/src/unity_config.h
+++ b/tests/xs3_tests/src/unity_config.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024 XMOS LIMITED.
+// Copyright 2020-2026 XMOS LIMITED.
 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
 #pragma once