From 34251a01481f96102ec5f2beea9c9965f409feff Mon Sep 17 00:00:00 2001 From: andrewstanfordjason Date: Wed, 28 Jan 2026 15:37:55 +0000 Subject: [PATCH] vx4b initial --- doc/rst/src/reference/notes.h | 2 +- examples/app_bfp_demo/src/main.c | 2 +- examples/app_fft_demo/src/main.c | 2 +- .../app_filter_demo/src/filter_16bit_fir.c | 2 +- .../app_filter_demo/src/filter_32bit_biquad.c | 2 +- .../app_filter_demo/src/filter_32bit_fir.c | 2 +- examples/app_filter_demo/src/main.c | 2 +- examples/app_vect_demo/src/main.c | 2 +- .../src/vect_complex_s16_example.c | 2 +- examples/app_vect_demo/src/vect_s32_example.c | 2 +- lib_xcore_math/CMakeLists.txt | 6 +- lib_xcore_math/api/xcore_math.h | 2 +- lib_xcore_math/api/xmath/_support/dct_impl.h | 2 +- lib_xcore_math/api/xmath/_support/fft_impl.h | 2 +- lib_xcore_math/api/xmath/api.h | 2 +- lib_xcore_math/api/xmath/bfp/bfp.h | 2 +- .../api/xmath/bfp/bfp_complex_s16.h | 2 +- .../api/xmath/bfp/bfp_complex_s32.h | 2 +- lib_xcore_math/api/xmath/bfp/bfp_misc.h | 2 +- lib_xcore_math/api/xmath/bfp/bfp_s16.h | 2 +- lib_xcore_math/api/xmath/bfp/bfp_s32.h | 2 +- lib_xcore_math/api/xmath/dct.h | 2 +- lib_xcore_math/api/xmath/fft.h | 2 +- lib_xcore_math/api/xmath/filter.h | 2 +- lib_xcore_math/api/xmath/q_format.h | 2 +- lib_xcore_math/api/xmath/scalar/f32.h | 2 +- .../api/xmath/scalar/float_complex_s16.h | 2 +- .../api/xmath/scalar/float_complex_s32.h | 2 +- lib_xcore_math/api/xmath/scalar/float_s32.h | 2 +- lib_xcore_math/api/xmath/scalar/s16.h | 2 +- lib_xcore_math/api/xmath/scalar/s32.h | 2 +- lib_xcore_math/api/xmath/scalar/scalar.h | 2 +- lib_xcore_math/api/xmath/scalar/scalar_misc.h | 2 +- lib_xcore_math/api/xmath/types.h | 2 +- lib_xcore_math/api/xmath/util.h | 8 +- lib_xcore_math/api/xmath/vect/chunk_s32.h | 2 +- lib_xcore_math/api/xmath/vect/vect.h | 2 +- .../api/xmath/vect/vect_complex_s16.h | 2 +- .../api/xmath/vect/vect_complex_s16_prepare.h | 2 +- .../api/xmath/vect/vect_complex_s32.h | 2 +- .../api/xmath/vect/vect_complex_s32_prepare.h | 2 +- lib_xcore_math/api/xmath/vect/vect_f32.h | 2 +- lib_xcore_math/api/xmath/vect/vect_mixed.h | 2 +- lib_xcore_math/api/xmath/vect/vect_s16.h | 2 +- .../api/xmath/vect/vect_s16_prepare.h | 2 +- lib_xcore_math/api/xmath/vect/vect_s32.h | 2 +- .../api/xmath/vect/vect_s32_prepare.h | 2 +- lib_xcore_math/api/xmath/vect/vect_s8.h | 2 +- lib_xcore_math/api/xmath/xmath.h | 2 +- lib_xcore_math/api/xmath/xmath_conf.h | 2 +- lib_xcore_math/api/xmath/xs3/vpu_info.h | 20 +- lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h | 13 +- .../python/gen_biquad_filter_s32.py | 2 +- lib_xcore_math/python/gen_fft_table.py | 2 +- lib_xcore_math/python/gen_fir_filter_s16.py | 2 +- lib_xcore_math/python/gen_fir_filter_s32.py | 2 +- lib_xcore_math/python/gen_rot_table.py | 2 +- lib_xcore_math/python/xmath_script.py | 2 +- .../src/arch/ref/bool/vect_s8_is_negative.c | 2 +- lib_xcore_math/src/arch/ref/chunk.c | 2 +- .../src/arch/ref/chunk_s16_accumulate.c | 2 +- .../arch/ref/complex/vect_complex_conj_macc.c | 2 +- .../arch/ref/complex/vect_complex_conjugate.c | 2 +- .../ref/complex/vect_complex_depth_convert.c | 2 +- .../src/arch/ref/complex/vect_complex_macc.c | 2 +- .../src/arch/ref/complex/vect_complex_mag.c | 2 +- .../src/arch/ref/complex/vect_complex_mul.c | 2 +- .../src/arch/ref/complex/vect_complex_sum.c | 2 +- lib_xcore_math/src/arch/ref/dct/dct.c | 2 +- lib_xcore_math/src/arch/ref/dct/dct8x8.c | 2 +- lib_xcore_math/src/arch/ref/dct/idct.c | 2 +- lib_xcore_math/src/arch/ref/f32.c | 2 +- lib_xcore_math/src/arch/ref/fft/fft_dif.c | 2 +- lib_xcore_math/src/arch/ref/fft/fft_dit.c | 2 +- lib_xcore_math/src/arch/ref/fft/fft_util.c | 2 +- .../src/arch/ref/filter/filter_biquad_s32.c | 2 +- .../arch/ref/filter/filter_biquad_sat_s32.c | 2 +- .../src/arch/ref/filter/filter_fir_s16.c | 2 +- .../src/arch/ref/filter/filter_fir_s32.c | 2 +- lib_xcore_math/src/arch/ref/float_s32.c | 2 +- .../ref/matrix/mat_mul_s8_x_s8_yield_s32.c | 2 +- lib_xcore_math/src/arch/ref/misc.c | 2 +- lib_xcore_math/src/arch/ref/qXX.c | 2 +- lib_xcore_math/src/arch/ref/s32_sqrt.c | 2 +- .../src/arch/ref/vect_abs_clip_rect.c | 2 +- lib_xcore_math/src/arch/ref/vect_add_sub.c | 2 +- lib_xcore_math/src/arch/ref/vect_convolve.c | 2 +- lib_xcore_math/src/arch/ref/vect_copy.c | 2 +- .../src/arch/ref/vect_depth_convert.c | 2 +- lib_xcore_math/src/arch/ref/vect_dot.c | 2 +- lib_xcore_math/src/arch/ref/vect_f32.c | 2 +- lib_xcore_math/src/arch/ref/vect_headroom.c | 2 +- lib_xcore_math/src/arch/ref/vect_inverse.c | 2 +- lib_xcore_math/src/arch/ref/vect_macc.c | 2 +- lib_xcore_math/src/arch/ref/vect_mul.c | 2 +- .../src/arch/ref/vect_s16_extract.c | 2 +- lib_xcore_math/src/arch/ref/vect_sXX.c | 2 +- lib_xcore_math/src/arch/ref/vect_set.c | 2 +- lib_xcore_math/src/arch/ref/vect_shl.c | 2 +- lib_xcore_math/src/arch/ref/vect_sqrt.c | 2 +- lib_xcore_math/src/arch/ref/vect_stats.c | 2 +- lib_xcore_math/src/arch/ref/vect_sum.c | 2 +- lib_xcore_math/src/arch/ref/vect_zip.c | 2 +- lib_xcore_math/src/arch/ref/vpu_scalar_ops.c | 29 +- lib_xcore_math/src/arch/vx4b/NOTES.rst | 5 + lib_xcore_math/src/arch/vx4b/asm_helper.h | 25 + .../chunk_s16/chunk_s16_accumulate.almost | 97 +++ .../vx4b/chunk_s16/chunk_s16_accumulate.c | 47 ++ .../src/arch/vx4b/chunk_s32/chunk_s32_dot.S | 50 ++ .../src/arch/vx4b/chunk_s32/chunk_s32_log.S | 176 ++++++ .../vx4b/chunk_s32/chunk_s32_power_series.S | 87 +++ .../chunk_s32/chunk_s32_power_series_v2.S | 108 ++++ .../src/arch/vx4b/dct/s32/dct12_s32.S | 140 +++++ .../src/arch/vx4b/dct/s32/dct16_s32.S | 157 +++++ .../src/arch/vx4b/dct/s32/dct24_s32.S | 178 ++++++ .../src/arch/vx4b/dct/s32/dct6_s32.S | 65 ++ .../src/arch/vx4b/dct/s32/dct8_s32.S | 67 +++ .../src/arch/vx4b/dct/s32/dct_adsb_s32.S | 78 +++ .../arch/vx4b/dct/s32/dct_deconvolve_s32.S | 81 +++ .../src/arch/vx4b/dct/s32/idct6_s32.S | 64 ++ .../src/arch/vx4b/dct/s32/idct8_s32.S | 65 ++ .../src/arch/vx4b/dct/s32/idct_adsb.S | 73 +++ .../src/arch/vx4b/dct/s32/idct_convolve.S | 80 +++ .../src/arch/vx4b/dct/s32/idct_scale.S | 66 ++ .../src/arch/vx4b/dct/s8/dct8x8_stageA.S | 187 ++++++ .../src/arch/vx4b/dct/s8/dct8x8_stageB.S | 191 ++++++ .../src/arch/vx4b/dct/vect_s32_flip.S | 54 ++ lib_xcore_math/src/arch/vx4b/fft/dif_fft.S | 285 +++++++++ lib_xcore_math/src/arch/vx4b/fft/dit_fft.S | 316 ++++++++++ lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S | 24 + .../arch/vx4b/fft/fft_index_bit_reversal.S | 65 ++ .../src/arch/vx4b/fft/fft_mono_adjust.S | 218 +++++++ .../src/arch/vx4b/fft/fft_spectra_merge.S | 158 +++++ .../src/arch/vx4b/fft/fft_spectra_split.S | 150 +++++ .../arch/vx4b/fft/tail_reverse_complex_s32.S | 106 ++++ .../src/arch/vx4b/filter/filter_biquad_s32.S | 155 +++++ .../arch/vx4b/filter/filter_biquad_sat_s32.S | 220 +++++++ .../src/arch/vx4b/filter/filter_fir_s16.S | 118 ++++ .../src/arch/vx4b/filter/filter_fir_s32.S | 206 +++++++ .../arch/vx4b/filter/push_sample_down_s16.S | 127 ++++ .../src/arch/vx4b/filter/push_sample_up_s16.S | 152 +++++ .../vx4b/filter/vect_s32_convolve_valid.S | 130 ++++ .../vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S | 128 ++++ .../src/arch/vx4b/misc/chunk_float_s32_log.S | 184 ++++++ lib_xcore_math/src/arch/vx4b/misc/util.S | 103 ++++ lib_xcore_math/src/arch/vx4b/misc/vect_copy.S | 60 ++ .../vx4b/misc/vect_float_s32_ln_prepare.S | 122 ++++ .../src/arch/vx4b/misc/xs3_memcpy.S | 53 ++ .../src/arch/vx4b/scalar/f32_log2.S | 69 +++ .../src/arch/vx4b/scalar/f32_norm.S | 48 ++ .../src/arch/vx4b/scalar/f32_power_series.S | 133 +++++ lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S | 147 +++++ .../src/arch/vx4b/scalar/float_s32.c | 87 +++ .../src/arch/vx4b/scalar/float_s32_exp.almost | 153 +++++ .../src/arch/vx4b/scalar/q24_logistic_fast.S | 80 +++ .../src/arch/vx4b/scalar/q30_exp_small.S | 115 ++++ .../src/arch/vx4b/scalar/q30_odd_powers.S | 74 +++ .../src/arch/vx4b/scalar/q30_powers.S | 110 ++++ .../src/arch/vx4b/scalar/radians_to_sbrads.S | 95 +++ .../src/arch/vx4b/scalar/sbrad_sin.S | 118 ++++ .../src/arch/vx4b/scalar/sbrad_tan.S | 150 +++++ .../src/arch/vx4b/scalar/scalar_op_s16.S | 511 ++++++++++++++++ .../src/arch/vx4b/scalar/scalar_op_s32.S | 563 ++++++++++++++++++ .../src/arch/vx4b/scalar/scalar_op_s8.S | 423 +++++++++++++ .../src/arch/vx4b/scalar/sqrt_s32.S | 129 ++++ .../vect_complex_s16_complex_scale.S | 205 +++++++ .../vect_complex_s16_conj_macc.S | 219 +++++++ .../vect_complex_s16_conj_nmacc.S | 211 +++++++ .../vect_complex_s16_conjugate_mul.S | 160 +++++ .../vect_complex_s16/vect_complex_s16_macc.S | 212 +++++++ .../vect_complex_s16/vect_complex_s16_mag.S | 269 +++++++++ .../vect_complex_s16/vect_complex_s16_mul.S | 186 ++++++ .../vect_complex_s16/vect_complex_s16_nmacc.S | 211 +++++++ .../vect_complex_s16_real_mul.S | 142 +++++ .../vect_complex_s16_squared_mag.S | 120 ++++ .../vect_complex_s16/vect_complex_s16_sum.S | 156 +++++ .../vect_complex_s16_to_complex_s32.S | 60 ++ .../vect_complex_s32_complex_scale.S | 151 +++++ .../vect_complex_s32_conj_macc.S | 129 ++++ .../vect_complex_s32_conj_nmacc.S | 129 ++++ .../vect_complex_s32_conjugate.S | 83 +++ .../vect_complex_s32_conjugate_mul.S | 116 ++++ .../vect_complex_s32/vect_complex_s32_macc.S | 132 ++++ .../vect_complex_s32/vect_complex_s32_mag.S | 166 ++++++ .../vect_complex_s32/vect_complex_s32_mul.S | 116 ++++ .../vect_complex_s32/vect_complex_s32_nmacc.S | 128 ++++ .../vect_complex_s32_real_mul.S | 134 +++++ .../vect_complex_s32_squared_mag.S | 120 ++++ .../vect_complex_s32/vect_complex_s32_sum.S | 150 +++++ .../vect_complex_s32_to_complex_s16.S | 107 ++++ .../vect_f32/vect_complex_f32_conj_macc.S | 77 +++ .../vx4b/vect_f32/vect_complex_f32_conj_mul.S | 81 +++ .../vx4b/vect_f32/vect_complex_f32_macc.S | 77 +++ .../arch/vx4b/vect_f32/vect_complex_f32_mul.S | 82 +++ .../src/arch/vx4b/vect_f32/vect_f32_add.S | 88 +++ .../src/arch/vx4b/vect_f32/vect_f32_dot.S | 118 ++++ .../vx4b/vect_f32/vect_f32_max_exponent.S | 72 +++ .../src/arch/vx4b/vect_f32/vect_f32_to_s32.S | 90 +++ .../src/arch/vx4b/vect_s16/vect_s16_abs.S | 124 ++++ .../src/arch/vx4b/vect_s16/vect_s16_abs_sum.S | 150 +++++ .../src/arch/vx4b/vect_s16/vect_s16_argmax.S | 165 +++++ .../src/arch/vx4b/vect_s16/vect_s16_argmin.S | 184 ++++++ .../src/arch/vx4b/vect_s16/vect_s16_clip.S | 331 ++++++++++ .../src/arch/vx4b/vect_s16/vect_s16_dot.c | 36 ++ .../src/arch/vx4b/vect_s16/vect_s16_energy.S | 116 ++++ .../vect_s16/vect_s16_extract_high_byte.S | 132 ++++ .../vx4b/vect_s16/vect_s16_extract_low_byte.S | 129 ++++ .../src/arch/vx4b/vect_s16/vect_s16_inverse.S | 66 ++ .../src/arch/vx4b/vect_s16/vect_s16_macc.S | 122 ++++ .../src/arch/vx4b/vect_s16/vect_s16_max.S | 114 ++++ .../src/arch/vx4b/vect_s16/vect_s16_min.S | 115 ++++ .../src/arch/vx4b/vect_s16/vect_s16_mul.S | 106 ++++ .../src/arch/vx4b/vect_s16/vect_s16_nmacc.S | 122 ++++ .../src/arch/vx4b/vect_s16/vect_s16_scale.S | 120 ++++ .../src/arch/vx4b/vect_s16/vect_s16_sqrt.S | 203 +++++++ .../src/arch/vx4b/vect_s16/vect_s16_sum.S | 113 ++++ .../src/arch/vx4b/vect_s16/vect_s16_to_s32.S | 111 ++++ .../src/arch/vx4b/vect_s32/s32_to_chunk_s32.S | 49 ++ .../src/arch/vx4b/vect_s32/vect_s32_abs.S | 106 ++++ .../src/arch/vx4b/vect_s32/vect_s32_abs_sum.S | 109 ++++ .../src/arch/vx4b/vect_s32/vect_s32_argmax.S | 154 +++++ .../src/arch/vx4b/vect_s32/vect_s32_argmin.S | 161 +++++ .../src/arch/vx4b/vect_s32/vect_s32_clip.S | 330 ++++++++++ .../src/arch/vx4b/vect_s32/vect_s32_dot.S | 118 ++++ .../src/arch/vx4b/vect_s32/vect_s32_energy.S | 111 ++++ .../src/arch/vx4b/vect_s32/vect_s32_inverse.S | 113 ++++ .../src/arch/vx4b/vect_s32/vect_s32_macc.S | 119 ++++ .../src/arch/vx4b/vect_s32/vect_s32_max.S | 114 ++++ .../arch/vx4b/vect_s32/vect_s32_merge_accs.S | 104 ++++ .../src/arch/vx4b/vect_s32/vect_s32_min.S | 114 ++++ .../src/arch/vx4b/vect_s32/vect_s32_mul.S | 109 ++++ .../src/arch/vx4b/vect_s32/vect_s32_nmacc.S | 119 ++++ .../src/arch/vx4b/vect_s32/vect_s32_scale.S | 109 ++++ .../arch/vx4b/vect_s32/vect_s32_split_accs.S | 114 ++++ .../src/arch/vx4b/vect_s32/vect_s32_sqrt.S | 185 ++++++ .../src/arch/vx4b/vect_s32/vect_s32_sum.S | 98 +++ .../src/arch/vx4b/vect_s32/vect_s32_to_f32.S | 91 +++ .../src/arch/vx4b/vect_s32/vect_s32_to_s16.S | 77 +++ .../src/arch/vx4b/vect_s32/vect_s32_unzip.S | 69 +++ .../src/arch/vx4b/vect_s32/vect_s32_zip.S | 143 +++++ .../arch/vx4b/vect_s8/vect_s8_is_negative.S | 81 +++ .../src/arch/vx4b/vect_sXX/vect_add.S | 156 +++++ .../src/arch/vx4b/vect_sXX/vect_headroom.S | 134 +++++ .../src/arch/vx4b/vect_sXX/vect_rect.S | 138 +++++ .../arch/vx4b/vect_sXX/vect_sXX_add_scalar.S | 112 ++++ .../vx4b/vect_sXX/vect_sXX_max_elementwise.S | 192 ++++++ .../vx4b/vect_sXX/vect_sXX_min_elementwise.S | 193 ++++++ .../src/arch/vx4b/vect_sXX/vect_set.S | 133 +++++ .../src/arch/vx4b/vect_sXX/vect_shl.S | 169 ++++++ .../src/arch/vx4b/vect_sXX/vect_sub.S | 151 +++++ lib_xcore_math/src/arch/xs3/asm_helper.h | 2 +- .../arch/xs3/chunk_s16/chunk_s16_accumulate.S | 2 +- .../src/arch/xs3/chunk_s32/chunk_s32_dot.S | 2 +- .../src/arch/xs3/chunk_s32/chunk_s32_log.S | 2 +- .../xs3/chunk_s32/chunk_s32_power_series.S | 2 +- .../xs3/chunk_s32/chunk_s32_power_series_v2.S | 2 +- .../src/arch/xs3/dct/s32/dct12_s32.S | 2 +- .../src/arch/xs3/dct/s32/dct16_s32.S | 2 +- .../src/arch/xs3/dct/s32/dct24_s32.S | 2 +- .../src/arch/xs3/dct/s32/dct6_s32.S | 2 +- .../src/arch/xs3/dct/s32/dct8_s32.S | 2 +- .../src/arch/xs3/dct/s32/dct_adsb_s32.S | 2 +- .../src/arch/xs3/dct/s32/dct_deconvolve_s32.S | 2 +- .../src/arch/xs3/dct/s32/idct6_s32.S | 2 +- .../src/arch/xs3/dct/s32/idct8_s32.S | 2 +- .../src/arch/xs3/dct/s32/idct_adsb.S | 2 +- .../src/arch/xs3/dct/s32/idct_convolve.S | 2 +- .../src/arch/xs3/dct/s32/idct_scale.S | 2 +- .../src/arch/xs3/dct/s8/dct8x8_stageA.S | 2 +- .../src/arch/xs3/dct/s8/dct8x8_stageB.S | 2 +- .../src/arch/xs3/dct/vect_s32_flip.S | 2 +- lib_xcore_math/src/arch/xs3/fft/dif_fft.S | 2 +- lib_xcore_math/src/arch/xs3/fft/dit_fft.S | 2 +- lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S | 2 +- .../src/arch/xs3/fft/fft_index_bit_reversal.S | 2 +- .../src/arch/xs3/fft/fft_mono_adjust.S | 2 +- .../src/arch/xs3/fft/fft_spectra_merge.S | 2 +- .../src/arch/xs3/fft/fft_spectra_split.S | 2 +- .../arch/xs3/fft/tail_reverse_complex_s32.S | 2 +- .../src/arch/xs3/filter/filter_biquad_s32.S | 2 +- .../arch/xs3/filter/filter_biquad_sat_s32.S | 2 +- .../src/arch/xs3/filter/filter_fir_s16.S | 2 +- .../src/arch/xs3/filter/filter_fir_s32.S | 2 +- .../arch/xs3/filter/push_sample_down_s16.S | 2 +- .../src/arch/xs3/filter/push_sample_up_s16.S | 2 +- .../arch/xs3/filter/vect_s32_convolve_valid.S | 2 +- .../xs3/matrix/mat_mul_s8_x_s8_yield_s32.S | 2 +- .../src/arch/xs3/misc/chunk_float_s32_log.S | 2 +- lib_xcore_math/src/arch/xs3/misc/util.S | 2 +- lib_xcore_math/src/arch/xs3/misc/vect_copy.S | 2 +- .../arch/xs3/misc/vect_float_s32_ln_prepare.S | 2 +- lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S | 2 +- lib_xcore_math/src/arch/xs3/scalar/f32_log2.S | 2 +- lib_xcore_math/src/arch/xs3/scalar/f32_norm.S | 2 +- .../src/arch/xs3/scalar/f32_power_series.S | 2 +- lib_xcore_math/src/arch/xs3/scalar/f32_sin.S | 2 +- .../src/arch/xs3/scalar/float_s32_exp.S | 2 +- .../src/arch/xs3/scalar/q24_logistic_fast.S | 2 +- .../src/arch/xs3/scalar/q30_exp_small.S | 2 +- .../src/arch/xs3/scalar/q30_odd_powers.S | 2 +- .../src/arch/xs3/scalar/q30_powers.S | 2 +- .../src/arch/xs3/scalar/radians_to_sbrads.S | 2 +- .../src/arch/xs3/scalar/sbrad_sin.S | 2 +- .../src/arch/xs3/scalar/sbrad_tan.S | 2 +- .../src/arch/xs3/scalar/scalar_op_s16.S | 2 +- .../src/arch/xs3/scalar/scalar_op_s32.S | 2 +- .../src/arch/xs3/scalar/scalar_op_s8.S | 2 +- lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S | 2 +- .../vect_complex_s16_complex_scale.S | 2 +- .../vect_complex_s16_conj_macc.S | 2 +- .../vect_complex_s16_conj_nmacc.S | 2 +- .../vect_complex_s16_conjugate_mul.S | 2 +- .../vect_complex_s16/vect_complex_s16_macc.S | 2 +- .../vect_complex_s16/vect_complex_s16_mag.S | 2 +- .../vect_complex_s16/vect_complex_s16_mul.S | 2 +- .../vect_complex_s16/vect_complex_s16_nmacc.S | 2 +- .../vect_complex_s16_real_mul.S | 2 +- .../vect_complex_s16_squared_mag.S | 2 +- .../vect_complex_s16/vect_complex_s16_sum.S | 2 +- .../vect_complex_s16_to_complex_s32.S | 2 +- .../vect_complex_s32_complex_scale.S | 2 +- .../vect_complex_s32_conj_macc.S | 2 +- .../vect_complex_s32_conj_nmacc.S | 2 +- .../vect_complex_s32_conjugate.S | 2 +- .../vect_complex_s32_conjugate_mul.S | 2 +- .../vect_complex_s32/vect_complex_s32_macc.S | 2 +- .../vect_complex_s32/vect_complex_s32_mag.S | 2 +- .../vect_complex_s32/vect_complex_s32_mul.S | 2 +- .../vect_complex_s32/vect_complex_s32_nmacc.S | 2 +- .../vect_complex_s32_real_mul.S | 2 +- .../vect_complex_s32_squared_mag.S | 2 +- .../vect_complex_s32/vect_complex_s32_sum.S | 2 +- .../vect_complex_s32_to_complex_s16.S | 2 +- .../xs3/vect_f32/vect_complex_f32_conj_macc.S | 2 +- .../xs3/vect_f32/vect_complex_f32_conj_mul.S | 2 +- .../arch/xs3/vect_f32/vect_complex_f32_macc.S | 2 +- .../arch/xs3/vect_f32/vect_complex_f32_mul.S | 2 +- .../src/arch/xs3/vect_f32/vect_f32_add.S | 2 +- .../src/arch/xs3/vect_f32/vect_f32_dot.S | 2 +- .../arch/xs3/vect_f32/vect_f32_max_exponent.S | 2 +- .../src/arch/xs3/vect_f32/vect_f32_to_s32.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_abs_sum.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_argmax.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_argmin.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_clip.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_dot.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_energy.S | 2 +- .../xs3/vect_s16/vect_s16_extract_high_byte.S | 2 +- .../xs3/vect_s16/vect_s16_extract_low_byte.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_inverse.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_macc.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_max.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_min.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_mul.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_nmacc.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_scale.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_sqrt.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_sum.S | 2 +- .../src/arch/xs3/vect_s16/vect_s16_to_s32.S | 2 +- .../src/arch/xs3/vect_s32/s32_to_chunk_s32.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_abs_sum.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_argmax.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_argmin.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_clip.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_dot.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_energy.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_inverse.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_macc.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_max.S | 2 +- .../arch/xs3/vect_s32/vect_s32_merge_accs.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_min.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_mul.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_nmacc.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_scale.S | 2 +- .../arch/xs3/vect_s32/vect_s32_split_accs.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_sqrt.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_sum.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_to_f32.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_to_s16.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_unzip.S | 2 +- .../src/arch/xs3/vect_s32/vect_s32_zip.S | 2 +- .../arch/xs3/vect_s8/vect_s8_is_negative.S | 2 +- .../src/arch/xs3/vect_sXX/vect_abs.S | 2 +- .../src/arch/xs3/vect_sXX/vect_add.S | 2 +- .../src/arch/xs3/vect_sXX/vect_headroom.S | 2 +- .../src/arch/xs3/vect_sXX/vect_rect.S | 2 +- .../arch/xs3/vect_sXX/vect_sXX_add_scalar.S | 2 +- .../xs3/vect_sXX/vect_sXX_max_elementwise.S | 2 +- .../xs3/vect_sXX/vect_sXX_min_elementwise.S | 2 +- .../src/arch/xs3/vect_sXX/vect_set.S | 2 +- .../src/arch/xs3/vect_sXX/vect_shl.S | 2 +- .../src/arch/xs3/vect_sXX/vect_sub.S | 2 +- lib_xcore_math/src/bfp/bfp_alloc.c | 2 +- lib_xcore_math/src/bfp/bfp_complex_s16.c | 2 +- lib_xcore_math/src/bfp/bfp_complex_s32.c | 2 +- lib_xcore_math/src/bfp/bfp_init.c | 2 +- lib_xcore_math/src/bfp/bfp_s16.c | 2 +- lib_xcore_math/src/bfp/bfp_s32.c | 2 +- .../src/bfp/misc/gradient_constraint.c | 2 +- lib_xcore_math/src/dct/dct8x8.c | 2 +- lib_xcore_math/src/dct/dct_forward.c | 2 +- lib_xcore_math/src/dct/dct_inverse.c | 2 +- .../src/etc/xmath_fft_lut/xmath_fft_lut.c | 2 +- .../src/etc/xmath_fft_lut/xmath_fft_lut.h | 2 +- lib_xcore_math/src/fft/fft_bfp.c | 2 +- lib_xcore_math/src/fft/fft_f32.c | 2 +- lib_xcore_math/src/filter/filters.c | 2 +- lib_xcore_math/src/scalar/scalar_f32.c | 2 +- .../src/scalar/scalar_float_complex_sXX.c | 2 +- lib_xcore_math/src/scalar/scalar_float_s32.c | 2 +- lib_xcore_math/src/scalar/scalar_float_s64.c | 2 +- lib_xcore_math/src/scalar/scalar_ops.c | 2 +- lib_xcore_math/src/scalar/scalar_qXX.c | 2 +- lib_xcore_math/src/vect/chunk_s32.c | 2 +- lib_xcore_math/src/vect/complex_prepare.c | 2 +- lib_xcore_math/src/vect/convolve.c | 2 +- lib_xcore_math/src/vect/mat_mul.c | 2 +- lib_xcore_math/src/vect/prepare.c | 2 +- .../src/vect/vect_complex_mag_rot_tables.c | 2 +- lib_xcore_math/src/vect/vect_complex_s16.c | 2 +- lib_xcore_math/src/vect/vect_complex_s32.c | 2 +- lib_xcore_math/src/vect/vect_f32.c | 2 +- lib_xcore_math/src/vect/vect_float_s32.c | 2 +- lib_xcore_math/src/vect/vect_s16.c | 2 +- lib_xcore_math/src/vect/vect_s32.c | 2 +- lib_xcore_math/src/vect/vpu_const_vects.c | 2 +- lib_xcore_math/src/vect/vpu_const_vects.h | 2 +- lib_xcore_math/src/vect/vpu_helper.h | 2 +- tests/Makefile | 210 +++++++ .../bfp/complex/test_bfp_bitdepth_convert.c | 2 +- .../src/bfp/complex/test_bfp_complex_add.c | 2 +- .../bfp/complex/test_bfp_complex_add_scalar.c | 2 +- .../bfp/complex/test_bfp_complex_conj_macc.c | 2 +- .../bfp/complex/test_bfp_complex_conjugate.c | 2 +- .../complex/test_bfp_complex_conjugate_mul.c | 11 +- .../src/bfp/complex/test_bfp_complex_energy.c | 2 +- .../src/bfp/complex/test_bfp_complex_macc.c | 20 +- .../src/bfp/complex/test_bfp_complex_mag.c | 2 +- .../src/bfp/complex/test_bfp_complex_make.c | 2 +- .../src/bfp/complex/test_bfp_complex_mul.c | 11 +- .../bfp/complex/test_bfp_complex_real_mul.c | 2 +- .../bfp/complex/test_bfp_complex_real_scale.c | 2 +- .../src/bfp/complex/test_bfp_complex_scale.c | 13 +- .../complex/test_bfp_complex_squared_mag.c | 10 +- .../src/bfp/complex/test_bfp_complex_sub.c | 2 +- .../complex/test_bfp_complex_use_exponent.c | 2 +- .../src/bfp/complex/test_bfp_sum_complex.c | 11 +- tests/bfp_tests/src/bfp/real/test_bfp_abs.c | 8 +- .../bfp_tests/src/bfp/real/test_bfp_abs_sum.c | 12 +- tests/bfp_tests/src/bfp/real/test_bfp_add.c | 2 +- .../src/bfp/real/test_bfp_add_scalar.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_alloc.c | 2 +- .../bfp_tests/src/bfp/real/test_bfp_argmax.c | 2 +- .../bfp_tests/src/bfp/real/test_bfp_argmin.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_clip.c | 2 +- .../src/bfp/real/test_bfp_convolve.c | 2 +- .../bfp_tests/src/bfp/real/test_bfp_dealloc.c | 2 +- .../src/bfp/real/test_bfp_depth_convert.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_dot.c | 2 +- .../bfp_tests/src/bfp/real/test_bfp_energy.c | 2 +- .../src/bfp/real/test_bfp_headroom.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_init.c | 2 +- .../bfp_tests/src/bfp/real/test_bfp_inverse.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_macc.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_max.c | 16 +- .../src/bfp/real/test_bfp_max_elementwise.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_mean.c | 9 +- tests/bfp_tests/src/bfp/real/test_bfp_min.c | 2 +- .../src/bfp/real/test_bfp_min_elementwise.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_mul.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_rect.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_rms.c | 6 +- .../src/bfp/real/test_bfp_s16_accumulate.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_scale.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_set.c | 2 +- .../src/bfp/real/test_bfp_shl_vect.c | 2 +- .../src/bfp/real/test_bfp_sqrt_vect.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_sub.c | 2 +- tests/bfp_tests/src/bfp/real/test_bfp_sum.c | 14 +- .../src/bfp/real/test_bfp_use_exponent.c | 2 +- tests/bfp_tests/src/main.c | 2 +- .../src/misc/test_bfp_gradient_constraint.c | 2 +- tests/bfp_tests/src/tst_asserts.h | 2 +- tests/bfp_tests/src/tst_common.c | 2 +- tests/bfp_tests/src/tst_common.h | 2 +- tests/bfp_tests/src/unity_config.h | 2 +- tests/config.xml | 555 +++++++++++++++++ tests/dct_tests/src/lib_dsp/dsp_dct.c | 2 +- tests/dct_tests/src/lib_dsp/dsp_dct.h | 2 +- tests/dct_tests/src/main.c | 2 +- tests/dct_tests/src/test_dct8x8.c | 2 +- tests/dct_tests/src/test_dctXX_forward.c | 2 +- tests/dct_tests/src/test_dctXX_inverse.c | 18 +- tests/dct_tests/src/test_random.h | 2 +- tests/dct_tests/src/tst_common.c | 2 +- tests/dct_tests/src/tst_common.h | 2 +- tests/dct_tests/src/unity_config.h | 2 +- tests/fft_tests/src/main.c | 2 +- tests/fft_tests/src/test_bfp_fft.c | 2 +- tests/fft_tests/src/test_bfp_pack_unpack.c | 2 +- tests/fft_tests/src/test_fft_dif.c | 2 +- tests/fft_tests/src/test_fft_dit.c | 2 +- tests/fft_tests/src/test_fft_helpers.c | 2 +- tests/fft_tests/src/test_fft_mono_adjust.c | 2 +- tests/fft_tests/src/test_issue96.c | 2 +- tests/fft_tests/src/test_random.h | 2 +- tests/fft_tests/src/test_vect_f32_fft.c | 2 +- tests/fft_tests/src/tst_common.c | 2 +- tests/fft_tests/src/tst_common.h | 2 +- tests/fft_tests/src/unity_config.h | 2 +- .../script/test_filter_biquad_s32_case3.py | 2 +- .../test_filter_biquad_sat_s32_case3.py | 2 +- .../src/filter/test_filter_biquad_s32.c | 2 +- .../src/filter/test_filter_biquad_sat_s32.c | 6 +- .../src/filter/test_filter_fir_s16.c | 2 +- .../filter/test_filter_fir_s16_push_sample.c | 2 +- .../src/filter/test_filter_fir_s32.c | 2 +- tests/filter_tests/src/main.c | 2 +- tests/filter_tests/src/tst_common.h | 2 +- tests/filter_tests/src/unity_config.h | 2 +- tests/legacy_build/src/main.c | 2 +- tests/scalar_tests/src/basic/test_cls.c | 2 +- tests/scalar_tests/src/basic/test_hr.c | 2 +- .../scalar_tests/src/float/test_fixed_trig.c | 16 +- .../src/float/test_float_convert.c | 2 +- tests/scalar_tests/src/float/test_float_exp.c | 6 +- tests/scalar_tests/src/float/test_float_log.c | 2 +- .../src/float/test_float_logistic.c | 2 +- .../src/float/test_float_s32_sqrt.c | 2 +- .../src/float/test_float_sXX_abs.c | 2 +- .../src/float/test_float_sXX_add.c | 2 +- .../src/float/test_float_sXX_div.c | 2 +- .../src/float/test_float_sXX_ema.c | 2 +- .../src/float/test_float_sXX_gt.c | 2 +- .../src/float/test_float_sXX_mul.c | 2 +- .../src/float/test_float_sXX_sub.c | 2 +- .../scalar_tests/src/float/test_float_trig.c | 2 +- .../scalar_tests/src/float/test_q30_powers.c | 4 +- tests/scalar_tests/src/main.c | 2 +- tests/scalar_tests/src/tst_asserts.h | 2 +- tests/scalar_tests/src/tst_common.c | 2 +- tests/scalar_tests/src/tst_common.h | 2 +- tests/scalar_tests/src/unity_config.h | 2 +- tests/scalar_tests/src/util/test_s32_sqrt.c | 2 +- .../scalar_tests/src/util/test_sXX_inverse.c | 2 +- tests/scalar_tests/src/util/test_sXX_mul.c | 6 +- tests/shared/floating_fft/floating_dct.c | 2 +- tests/shared/floating_fft/floating_dct.h | 2 +- tests/shared/floating_fft/floating_fft.h | 2 +- .../shared/floating_fft/floating_fft_double.c | 2 +- .../shared/floating_fft/floating_fft_float.c | 2 +- tests/shared/floating_fft/floating_fft_util.c | 2 +- tests/shared/pseudo_rand/pseudo_rand.c | 2 +- tests/shared/pseudo_rand/pseudo_rand.h | 2 +- tests/shared/pseudo_rand/rand_frame.c | 2 +- tests/shared/pseudo_rand/rand_frame.h | 2 +- tests/shared/testing/testing.h | 2 +- tests/shared/testing/testing_conv.c | 2 +- tests/shared/testing/testing_diff.c | 2 +- tests/shared/testing/testing_misc.c | 2 +- tests/shared/testing/testing_print.c | 2 +- tests/vect_tests/src/main.c | 2 +- .../matrix/test_mat_mul_s8_x_s16_yield_s32.c | 2 +- .../src/matrix/test_mat_mul_s8_x_s8.c | 2 +- tests/vect_tests/src/tst_asserts.h | 2 +- tests/vect_tests/src/tst_common.c | 2 +- tests/vect_tests/src/tst_common.h | 2 +- tests/vect_tests/src/unity_config.h | 2 +- .../src/vect/complex/test_vect_complex_add.c | 2 +- .../complex/test_vect_complex_add_scalar.c | 2 +- .../complex/test_vect_complex_complex_scale.c | 40 +- .../complex/test_vect_complex_conj_macc.c | 2 +- .../complex/test_vect_complex_conjugate.c | 2 +- .../complex/test_vect_complex_conjugate_mul.c | 41 +- .../src/vect/complex/test_vect_complex_macc.c | 2 +- .../src/vect/complex/test_vect_complex_mag.c | 6 +- .../src/vect/complex/test_vect_complex_mul.c | 39 +- .../vect/complex/test_vect_complex_real_mul.c | 50 +- .../complex/test_vect_complex_real_scale.c | 6 +- .../test_vect_complex_s16_to_complex_s32.c | 2 +- .../test_vect_complex_s32_to_complex_s16.c | 2 +- .../complex/test_vect_complex_squared_mag.c | 17 +- .../src/vect/complex/test_vect_complex_sub.c | 2 +- .../src/vect/complex/test_vect_sum_complex.c | 22 +- .../vect/float/test_vect_complex_f32_macc.c | 2 +- .../vect/float/test_vect_complex_f32_mul.c | 2 +- .../src/vect/float/test_vect_f32_add.c | 2 +- .../src/vect/float/test_vect_f32_dot.c | 2 +- .../vect/float/test_vect_f32_max_exponent.c | 2 +- .../src/vect/float/test_vect_f32_to_s32.c | 2 +- .../src/vect/float/test_vect_s32_to_f32.c | 2 +- .../src/vect/stat/test_vect_abs_sum.c | 24 +- .../src/vect/stat/test_vect_argmax.c | 2 +- .../src/vect/stat/test_vect_argmin.c | 2 +- .../src/vect/stat/test_vect_energy.c | 9 +- .../vect_tests/src/vect/stat/test_vect_max.c | 2 +- .../vect_tests/src/vect/stat/test_vect_min.c | 2 +- .../src/vect/test_chunk_s16_accumulate.c | 2 +- tests/vect_tests/src/vect/test_vect_abs.c | 22 +- tests/vect_tests/src/vect/test_vect_add.c | 6 +- .../src/vect/test_vect_add_scalar.c | 2 +- .../src/vect/test_vect_bitdepth_convert.c | 4 +- tests/vect_tests/src/vect/test_vect_clip.c | 30 +- tests/vect_tests/src/vect/test_vect_copy.c | 2 +- tests/vect_tests/src/vect/test_vect_dot.c | 2 +- tests/vect_tests/src/vect/test_vect_exp.c | 6 +- .../vect_tests/src/vect/test_vect_headroom.c | 2 +- tests/vect_tests/src/vect/test_vect_inverse.c | 2 +- tests/vect_tests/src/vect/test_vect_log.c | 10 +- tests/vect_tests/src/vect/test_vect_macc.c | 2 +- .../src/vect/test_vect_max_elementwise.c | 3 +- .../src/vect/test_vect_min_elementwise.c | 2 +- tests/vect_tests/src/vect/test_vect_mul.c | 65 +- tests/vect_tests/src/vect/test_vect_rect.c | 2 +- .../src/vect/test_vect_s16_extract.c | 2 +- .../src/vect/test_vect_s32_convolve.c | 2 +- .../src/vect/test_vect_s8_boolean.c | 2 +- tests/vect_tests/src/vect/test_vect_scale.c | 25 +- tests/vect_tests/src/vect/test_vect_set.c | 2 +- tests/vect_tests/src/vect/test_vect_shl.c | 10 +- tests/vect_tests/src/vect/test_vect_shr.c | 10 +- tests/vect_tests/src/vect/test_vect_sqrt.c | 8 +- tests/vect_tests/src/vect/test_vect_sub.c | 8 +- tests/vect_tests/src/vect/test_vect_sum.c | 17 +- tests/vect_tests/src/vect/test_vect_zip.c | 2 +- tests/xs3_tests/src/dummy.xc | 2 +- tests/xs3_tests/src/main.c | 2 +- tests/xs3_tests/src/test_vpu_scalar_ops_s16.c | 4 +- tests/xs3_tests/src/test_vpu_scalar_ops_s32.c | 6 +- tests/xs3_tests/src/test_vpu_scalar_ops_s8.c | 2 +- tests/xs3_tests/src/tst_asserts.h | 2 +- tests/xs3_tests/src/tst_common.c | 2 +- tests/xs3_tests/src/tst_common.h | 2 +- tests/xs3_tests/src/unity_config.h | 2 +- 634 files changed, 21295 insertions(+), 662 deletions(-) create mode 100644 lib_xcore_math/src/arch/vx4b/NOTES.rst create mode 100644 lib_xcore_math/src/arch/vx4b/asm_helper.h create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.almost create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_dot.S create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_log.S create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series.S create mode 100644 lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series_v2.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct12_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct16_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct24_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct6_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct8_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct_adsb_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/dct_deconvolve_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct6_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct8_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct_adsb.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct_convolve.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s32/idct_scale.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageA.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageB.S create mode 100644 lib_xcore_math/src/arch/vx4b/dct/vect_s32_flip.S create mode 100644 lib_xcore_math/src/arch/vx4b/fft/dif_fft.S create mode 100644 lib_xcore_math/src/arch/vx4b/fft/dit_fft.S create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_index_bit_reversal.S create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_mono_adjust.S create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_spectra_merge.S create mode 100644 lib_xcore_math/src/arch/vx4b/fft/fft_spectra_split.S create mode 100644 lib_xcore_math/src/arch/vx4b/fft/tail_reverse_complex_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S create mode 100644 lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S create mode 100644 lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S create mode 100644 lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S create mode 100644 lib_xcore_math/src/arch/vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/misc/chunk_float_s32_log.S create mode 100644 lib_xcore_math/src/arch/vx4b/misc/util.S create mode 100644 lib_xcore_math/src/arch/vx4b/misc/vect_copy.S create mode 100644 lib_xcore_math/src/arch/vx4b/misc/vect_float_s32_ln_prepare.S create mode 100644 lib_xcore_math/src/arch/vx4b/misc/xs3_memcpy.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/f32_log2.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/f32_norm.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/f32_power_series.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/float_s32.c create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/float_s32_exp.almost create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/q24_logistic_fast.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/q30_exp_small.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/q30_odd_powers.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/q30_powers.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/radians_to_sbrads.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/sbrad_sin.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/sbrad_tan.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S create mode 100644 lib_xcore_math/src/arch/vx4b/scalar/sqrt_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_complex_scale.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_macc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_nmacc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conjugate_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_macc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mag.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_nmacc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_real_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_squared_mag.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_sum.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_to_complex_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_complex_scale.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_macc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_nmacc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_macc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mag.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_nmacc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_real_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_squared_mag.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_sum.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_to_complex_s16.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_macc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_macc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_add.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_dot.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_max_exponent.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_to_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs_sum.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmax.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmin.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_clip.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_energy.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_high_byte.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_low_byte.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_inverse.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_macc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_max.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_min.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_nmacc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_scale.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sqrt.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sum.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_to_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/s32_to_chunk_s32.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs_sum.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmax.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmin.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_clip.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_dot.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_energy.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_inverse.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_macc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_max.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_merge_accs.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_min.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_mul.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_nmacc.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_scale.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_split_accs.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sqrt.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sum.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_f32.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_s16.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_unzip.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_zip.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_s8/vect_s8_is_negative.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_add.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_headroom.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_rect.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_add_scalar.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_max_elementwise.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_min_elementwise.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_set.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_shl.S create mode 100644 lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sub.S create mode 100644 tests/Makefile create mode 100644 tests/config.xml diff --git a/doc/rst/src/reference/notes.h b/doc/rst/src/reference/notes.h index 5aa326ef..67390f10 100644 --- a/doc/rst/src/reference/notes.h +++ b/doc/rst/src/reference/notes.h @@ -1,4 +1,4 @@ -// Copyright 2021-2024 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // This file exists as a compatibility work-around between vanilla Doxygen and diff --git a/examples/app_bfp_demo/src/main.c b/examples/app_bfp_demo/src/main.c index 276df4f5..d1acacd1 100644 --- a/examples/app_bfp_demo/src/main.c +++ b/examples/app_bfp_demo/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/examples/app_fft_demo/src/main.c b/examples/app_fft_demo/src/main.c index a3649ea5..97ea7da8 100644 --- a/examples/app_fft_demo/src/main.c +++ b/examples/app_fft_demo/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/examples/app_filter_demo/src/filter_16bit_fir.c b/examples/app_filter_demo/src/filter_16bit_fir.c index 3aab0391..f05b1ad6 100644 --- a/examples/app_filter_demo/src/filter_16bit_fir.c +++ b/examples/app_filter_demo/src/filter_16bit_fir.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/examples/app_filter_demo/src/filter_32bit_biquad.c b/examples/app_filter_demo/src/filter_32bit_biquad.c index 34188b80..c38876e3 100644 --- a/examples/app_filter_demo/src/filter_32bit_biquad.c +++ b/examples/app_filter_demo/src/filter_32bit_biquad.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/examples/app_filter_demo/src/filter_32bit_fir.c b/examples/app_filter_demo/src/filter_32bit_fir.c index c986d4c8..8c5b496c 100644 --- a/examples/app_filter_demo/src/filter_32bit_fir.c +++ b/examples/app_filter_demo/src/filter_32bit_fir.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/examples/app_filter_demo/src/main.c b/examples/app_filter_demo/src/main.c index 499274e4..04c12fc7 100644 --- a/examples/app_filter_demo/src/main.c +++ b/examples/app_filter_demo/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/examples/app_vect_demo/src/main.c b/examples/app_vect_demo/src/main.c index 73a59c04..7b870389 100644 --- a/examples/app_vect_demo/src/main.c +++ b/examples/app_vect_demo/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/examples/app_vect_demo/src/vect_complex_s16_example.c b/examples/app_vect_demo/src/vect_complex_s16_example.c index 39277583..dcfd3578 100644 --- a/examples/app_vect_demo/src/vect_complex_s16_example.c +++ b/examples/app_vect_demo/src/vect_complex_s16_example.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/examples/app_vect_demo/src/vect_s32_example.c b/examples/app_vect_demo/src/vect_s32_example.c index 5edcfeb4..d52c603b 100644 --- a/examples/app_vect_demo/src/vect_s32_example.c +++ b/examples/app_vect_demo/src/vect_s32_example.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/CMakeLists.txt b/lib_xcore_math/CMakeLists.txt index be41759e..299b2df6 100644 --- a/lib_xcore_math/CMakeLists.txt +++ b/lib_xcore_math/CMakeLists.txt @@ -15,7 +15,8 @@ file( GLOB_RECURSE SOURCES_C "src/vect/*.c" "src/filter/*.c" "src/scalar/*.c" ) file( GLOB_RECURSE SOURCES_CPP "src/*.cpp" ) -file( GLOB_RECURSE SOURCES_ASM "src/*.S" ) +file( GLOB_RECURSE SOURCES_ASM_XS3 "src/arch/xs3/*.S" ) +file( GLOB_RECURSE SOURCES_ASM_VX4B "src/arch/vx4b/*.S" ) file( GLOB_RECURSE SOURCES_REF "src/arch/ref/*.c" ) add_library( ${LIB_NAME} STATIC ) @@ -24,7 +25,8 @@ target_sources( ${LIB_NAME} PRIVATE ${SOURCES_C} ${SOURCES_CPP} - $<$:${SOURCES_ASM}> + $<$:${SOURCES_ASM_XS3}> + $<$:${SOURCES_ASM_VX4B}> $<$:${SOURCES_REF}> $<$:${SOURCES_REF}> $<$:${SOURCES_REF}> diff --git a/lib_xcore_math/api/xcore_math.h b/lib_xcore_math/api/xcore_math.h index f20c9065..35206223 100644 --- a/lib_xcore_math/api/xcore_math.h +++ b/lib_xcore_math/api/xcore_math.h @@ -1,4 +1,4 @@ -// Copyright 2024 XMOS LIMITED. +// Copyright 2024-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #ifndef _XCORE_MATH_H_ diff --git a/lib_xcore_math/api/xmath/_support/dct_impl.h b/lib_xcore_math/api/xmath/_support/dct_impl.h index c1947d37..7e0f01b3 100644 --- a/lib_xcore_math/api/xmath/_support/dct_impl.h +++ b/lib_xcore_math/api/xmath/_support/dct_impl.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/_support/fft_impl.h b/lib_xcore_math/api/xmath/_support/fft_impl.h index 7ab4eae8..66e35af8 100644 --- a/lib_xcore_math/api/xmath/_support/fft_impl.h +++ b/lib_xcore_math/api/xmath/_support/fft_impl.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/api.h b/lib_xcore_math/api/xmath/api.h index 9e51af19..80273a0f 100644 --- a/lib_xcore_math/api/xmath/api.h +++ b/lib_xcore_math/api/xmath/api.h @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/bfp/bfp.h b/lib_xcore_math/api/xmath/bfp/bfp.h index 98b61fa4..3baa654a 100644 --- a/lib_xcore_math/api/xmath/bfp/bfp.h +++ b/lib_xcore_math/api/xmath/bfp/bfp.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/bfp/bfp_complex_s16.h b/lib_xcore_math/api/xmath/bfp/bfp_complex_s16.h index ce9200a6..3cab8fb6 100644 --- a/lib_xcore_math/api/xmath/bfp/bfp_complex_s16.h +++ b/lib_xcore_math/api/xmath/bfp/bfp_complex_s16.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/bfp/bfp_complex_s32.h b/lib_xcore_math/api/xmath/bfp/bfp_complex_s32.h index 2047a509..89590cd3 100644 --- a/lib_xcore_math/api/xmath/bfp/bfp_complex_s32.h +++ b/lib_xcore_math/api/xmath/bfp/bfp_complex_s32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/bfp/bfp_misc.h b/lib_xcore_math/api/xmath/bfp/bfp_misc.h index d01a6f23..d35003c8 100644 --- a/lib_xcore_math/api/xmath/bfp/bfp_misc.h +++ b/lib_xcore_math/api/xmath/bfp/bfp_misc.h @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/bfp/bfp_s16.h b/lib_xcore_math/api/xmath/bfp/bfp_s16.h index fb2f7f12..0230d4c8 100644 --- a/lib_xcore_math/api/xmath/bfp/bfp_s16.h +++ b/lib_xcore_math/api/xmath/bfp/bfp_s16.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/bfp/bfp_s32.h b/lib_xcore_math/api/xmath/bfp/bfp_s32.h index 8c48a747..2402916a 100644 --- a/lib_xcore_math/api/xmath/bfp/bfp_s32.h +++ b/lib_xcore_math/api/xmath/bfp/bfp_s32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/dct.h b/lib_xcore_math/api/xmath/dct.h index ace528cb..60cadbd2 100644 --- a/lib_xcore_math/api/xmath/dct.h +++ b/lib_xcore_math/api/xmath/dct.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/fft.h b/lib_xcore_math/api/xmath/fft.h index 46a5dd2f..e1f27682 100644 --- a/lib_xcore_math/api/xmath/fft.h +++ b/lib_xcore_math/api/xmath/fft.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/filter.h b/lib_xcore_math/api/xmath/filter.h index 2b029d5e..7e8f0247 100644 --- a/lib_xcore_math/api/xmath/filter.h +++ b/lib_xcore_math/api/xmath/filter.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/q_format.h b/lib_xcore_math/api/xmath/q_format.h index e41af168..d7980a01 100644 --- a/lib_xcore_math/api/xmath/q_format.h +++ b/lib_xcore_math/api/xmath/q_format.h @@ -1,4 +1,4 @@ -// Copyright 2022 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/scalar/f32.h b/lib_xcore_math/api/xmath/scalar/f32.h index fdee5657..dfb63db9 100644 --- a/lib_xcore_math/api/xmath/scalar/f32.h +++ b/lib_xcore_math/api/xmath/scalar/f32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/scalar/float_complex_s16.h b/lib_xcore_math/api/xmath/scalar/float_complex_s16.h index dc7fe911..7a9a0749 100644 --- a/lib_xcore_math/api/xmath/scalar/float_complex_s16.h +++ b/lib_xcore_math/api/xmath/scalar/float_complex_s16.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/scalar/float_complex_s32.h b/lib_xcore_math/api/xmath/scalar/float_complex_s32.h index 4ca1a817..d51de583 100644 --- a/lib_xcore_math/api/xmath/scalar/float_complex_s32.h +++ b/lib_xcore_math/api/xmath/scalar/float_complex_s32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/scalar/float_s32.h b/lib_xcore_math/api/xmath/scalar/float_s32.h index 12dd2ddb..8a691f83 100644 --- a/lib_xcore_math/api/xmath/scalar/float_s32.h +++ b/lib_xcore_math/api/xmath/scalar/float_s32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/scalar/s16.h b/lib_xcore_math/api/xmath/scalar/s16.h index 981b626c..5c2c3379 100644 --- a/lib_xcore_math/api/xmath/scalar/s16.h +++ b/lib_xcore_math/api/xmath/scalar/s16.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/scalar/s32.h b/lib_xcore_math/api/xmath/scalar/s32.h index 141330f5..f9939a13 100644 --- a/lib_xcore_math/api/xmath/scalar/s32.h +++ b/lib_xcore_math/api/xmath/scalar/s32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/scalar/scalar.h b/lib_xcore_math/api/xmath/scalar/scalar.h index 4a6c12ca..f4284765 100644 --- a/lib_xcore_math/api/xmath/scalar/scalar.h +++ b/lib_xcore_math/api/xmath/scalar/scalar.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/scalar/scalar_misc.h b/lib_xcore_math/api/xmath/scalar/scalar_misc.h index ce4c217e..48cc2620 100644 --- a/lib_xcore_math/api/xmath/scalar/scalar_misc.h +++ b/lib_xcore_math/api/xmath/scalar/scalar_misc.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/types.h b/lib_xcore_math/api/xmath/types.h index 847145af..05064704 100644 --- a/lib_xcore_math/api/xmath/types.h +++ b/lib_xcore_math/api/xmath/types.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/util.h b/lib_xcore_math/api/xmath/util.h index 1cd5828e..35ac2127 100644 --- a/lib_xcore_math/api/xmath/util.h +++ b/lib_xcore_math/api/xmath/util.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once @@ -238,12 +238,16 @@ void xs3_memcpy( static inline unsigned cls( const int32_t a) { -#ifdef __XS3A__ +#if defined(__XS3A__) unsigned res; asm( "cls %0, %1" : "=r"(res) : "r"(a) ); return res; +#elif defined(__VX4B__) + unsigned res; + asm( "xm.cls %0, %1" : "=r"(res) : "r"(a) ); + return res; #else if(a == 0 || a == -1) diff --git a/lib_xcore_math/api/xmath/vect/chunk_s32.h b/lib_xcore_math/api/xmath/vect/chunk_s32.h index 3bfcdbde..58668541 100644 --- a/lib_xcore_math/api/xmath/vect/chunk_s32.h +++ b/lib_xcore_math/api/xmath/vect/chunk_s32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect.h b/lib_xcore_math/api/xmath/vect/vect.h index aee286d3..69d370f9 100644 --- a/lib_xcore_math/api/xmath/vect/vect.h +++ b/lib_xcore_math/api/xmath/vect/vect.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_complex_s16.h b/lib_xcore_math/api/xmath/vect/vect_complex_s16.h index 3cf9b9d6..f23825e1 100644 --- a/lib_xcore_math/api/xmath/vect/vect_complex_s16.h +++ b/lib_xcore_math/api/xmath/vect/vect_complex_s16.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_complex_s16_prepare.h b/lib_xcore_math/api/xmath/vect/vect_complex_s16_prepare.h index 138a387e..4cc7175c 100644 --- a/lib_xcore_math/api/xmath/vect/vect_complex_s16_prepare.h +++ b/lib_xcore_math/api/xmath/vect/vect_complex_s16_prepare.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_complex_s32.h b/lib_xcore_math/api/xmath/vect/vect_complex_s32.h index 082012ac..c3c13912 100644 --- a/lib_xcore_math/api/xmath/vect/vect_complex_s32.h +++ b/lib_xcore_math/api/xmath/vect/vect_complex_s32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_complex_s32_prepare.h b/lib_xcore_math/api/xmath/vect/vect_complex_s32_prepare.h index e402c256..802904cd 100644 --- a/lib_xcore_math/api/xmath/vect/vect_complex_s32_prepare.h +++ b/lib_xcore_math/api/xmath/vect/vect_complex_s32_prepare.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_f32.h b/lib_xcore_math/api/xmath/vect/vect_f32.h index af30f2af..c6fd5786 100644 --- a/lib_xcore_math/api/xmath/vect/vect_f32.h +++ b/lib_xcore_math/api/xmath/vect/vect_f32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_mixed.h b/lib_xcore_math/api/xmath/vect/vect_mixed.h index adcab5d4..35eac7e6 100644 --- a/lib_xcore_math/api/xmath/vect/vect_mixed.h +++ b/lib_xcore_math/api/xmath/vect/vect_mixed.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_s16.h b/lib_xcore_math/api/xmath/vect/vect_s16.h index efb387f7..a272c3ad 100644 --- a/lib_xcore_math/api/xmath/vect/vect_s16.h +++ b/lib_xcore_math/api/xmath/vect/vect_s16.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_s16_prepare.h b/lib_xcore_math/api/xmath/vect/vect_s16_prepare.h index 8ce17e5c..21e85d03 100644 --- a/lib_xcore_math/api/xmath/vect/vect_s16_prepare.h +++ b/lib_xcore_math/api/xmath/vect/vect_s16_prepare.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_s32.h b/lib_xcore_math/api/xmath/vect/vect_s32.h index 8a3666e9..e880550a 100644 --- a/lib_xcore_math/api/xmath/vect/vect_s32.h +++ b/lib_xcore_math/api/xmath/vect/vect_s32.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_s32_prepare.h b/lib_xcore_math/api/xmath/vect/vect_s32_prepare.h index a80cb47f..8117c82b 100644 --- a/lib_xcore_math/api/xmath/vect/vect_s32_prepare.h +++ b/lib_xcore_math/api/xmath/vect/vect_s32_prepare.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/vect/vect_s8.h b/lib_xcore_math/api/xmath/vect/vect_s8.h index b4e9e26e..37a051cb 100644 --- a/lib_xcore_math/api/xmath/vect/vect_s8.h +++ b/lib_xcore_math/api/xmath/vect/vect_s8.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/xmath.h b/lib_xcore_math/api/xmath/xmath.h index b6b0fa8f..1857e314 100644 --- a/lib_xcore_math/api/xmath/xmath.h +++ b/lib_xcore_math/api/xmath/xmath.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/xmath_conf.h b/lib_xcore_math/api/xmath/xmath_conf.h index c6b44616..868d39f9 100644 --- a/lib_xcore_math/api/xmath/xmath_conf.h +++ b/lib_xcore_math/api/xmath/xmath_conf.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/api/xmath/xs3/vpu_info.h b/lib_xcore_math/api/xmath/xs3/vpu_info.h index d219977d..a69aa2db 100644 --- a/lib_xcore_math/api/xmath/xs3/vpu_info.h +++ b/lib_xcore_math/api/xmath/xs3/vpu_info.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once @@ -72,7 +72,11 @@ enum { * * @ingroup xs3_vpu_info */ -#define VPU_INT8_MIN ( -0x7F ) +#if defined(__VX4B__) +#define VPU_INT8_MIN ( INT8_MIN) +#else +#define VPU_INT8_MIN ( -0x7F ) +#endif /** The upper saturation bound for 16-bit elements * * @ingroup xs3_vpu_info @@ -82,7 +86,11 @@ enum { * * @ingroup xs3_vpu_info */ +#if defined(__VX4B__) +#define VPU_INT16_MIN ( INT16_MIN) +#else #define VPU_INT16_MIN ( -0x7FFF ) +#endif /** The upper saturation bound for 32-bit elements and 32-bit accumulators * * @ingroup xs3_vpu_info @@ -92,7 +100,11 @@ enum { * * @ingroup xs3_vpu_info */ +#if defined(__VX4B__) +#define VPU_INT32_MIN ( INT32_MIN) +#else #define VPU_INT32_MIN ( -0x7FFFFFFF ) +#endif /** The upper saturation bound for 40-bit accumulators * * @ingroup xs3_vpu_info @@ -102,7 +114,11 @@ enum { * * @ingroup xs3_vpu_info */ +#if defined(__VX4B__) +#define VPU_INT40_MIN ( -0x8000000000LL) +#else #define VPU_INT40_MIN ( -0x7FFFFFFFFFLL ) +#endif /** * Number of accumulator bits in each operating mode. diff --git a/lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h b/lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h index aa1f7615..9ec9cb1a 100644 --- a/lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h +++ b/lib_xcore_math/api/xmath/xs3/vpu_scalar_ops.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once @@ -149,6 +149,13 @@ int16_t vlmul16( const int16_t x, const int16_t y); +/** + * Implements the logic of the VLMUL instruction in 16-bit mode. + */ +int16_t vlmul16_vx4b( + const int16_t x, + const int16_t y); + /** * Implements the logic of the VLMACC instruction in 16-bit mode. */ @@ -170,7 +177,11 @@ vpu_int16_acc_t vlmaccr16( */ int16_t vlsat16( const vpu_int16_acc_t acc, + #if defined(__VX4B__) + const right_shift_t sat); + #else const unsigned sat); + #endif /** * Implements the logic of the VADDDR instruction in 16-bit mode. diff --git a/lib_xcore_math/python/gen_biquad_filter_s32.py b/lib_xcore_math/python/gen_biquad_filter_s32.py index 11972696..d9654402 100644 --- a/lib_xcore_math/python/gen_biquad_filter_s32.py +++ b/lib_xcore_math/python/gen_biquad_filter_s32.py @@ -1,4 +1,4 @@ -# Copyright 2021-2024 XMOS LIMITED. +# Copyright 2021-2026 XMOS LIMITED. # This Software is subject to the terms of the XMOS Public Licence: Version 1. # This Software is subject to the terms of the XMOS Public Licence: Version 1. import numpy as np diff --git a/lib_xcore_math/python/gen_fft_table.py b/lib_xcore_math/python/gen_fft_table.py index 5029491d..56258ba2 100644 --- a/lib_xcore_math/python/gen_fft_table.py +++ b/lib_xcore_math/python/gen_fft_table.py @@ -1,4 +1,4 @@ -# Copyright 2020-2024 XMOS LIMITED. +# Copyright 2020-2026 XMOS LIMITED. # This Software is subject to the terms of the XMOS Public Licence: Version 1. import numpy as np import argparse diff --git a/lib_xcore_math/python/gen_fir_filter_s16.py b/lib_xcore_math/python/gen_fir_filter_s16.py index e8c9bcc5..a9134958 100644 --- a/lib_xcore_math/python/gen_fir_filter_s16.py +++ b/lib_xcore_math/python/gen_fir_filter_s16.py @@ -1,4 +1,4 @@ -# Copyright 2021-2024 XMOS LIMITED. +# Copyright 2021-2026 XMOS LIMITED. # This Software is subject to the terms of the XMOS Public Licence: Version 1. # This Software is subject to the terms of the XMOS Public Licence: Version 1. import numpy as np diff --git a/lib_xcore_math/python/gen_fir_filter_s32.py b/lib_xcore_math/python/gen_fir_filter_s32.py index 27ea7ab2..a6b998d7 100644 --- a/lib_xcore_math/python/gen_fir_filter_s32.py +++ b/lib_xcore_math/python/gen_fir_filter_s32.py @@ -1,4 +1,4 @@ -# Copyright 2021-2024 XMOS LIMITED. +# Copyright 2021-2026 XMOS LIMITED. # This Software is subject to the terms of the XMOS Public Licence: Version 1. # This Software is subject to the terms of the XMOS Public Licence: Version 1. import numpy as np diff --git a/lib_xcore_math/python/gen_rot_table.py b/lib_xcore_math/python/gen_rot_table.py index d4377153..bb191c38 100644 --- a/lib_xcore_math/python/gen_rot_table.py +++ b/lib_xcore_math/python/gen_rot_table.py @@ -1,4 +1,4 @@ -# Copyright 2020-2024 XMOS LIMITED. +# Copyright 2020-2026 XMOS LIMITED. # This Software is subject to the terms of the XMOS Public Licence: Version 1. import numpy as np diff --git a/lib_xcore_math/python/xmath_script.py b/lib_xcore_math/python/xmath_script.py index c91a4429..2718c691 100644 --- a/lib_xcore_math/python/xmath_script.py +++ b/lib_xcore_math/python/xmath_script.py @@ -1,4 +1,4 @@ -# Copyright 2021-2024 XMOS LIMITED. +# Copyright 2021-2026 XMOS LIMITED. # This Software is subject to the terms of the XMOS Public Licence: Version 1. import numpy as np diff --git a/lib_xcore_math/src/arch/ref/bool/vect_s8_is_negative.c b/lib_xcore_math/src/arch/ref/bool/vect_s8_is_negative.c index 944a48b5..285fd7fe 100644 --- a/lib_xcore_math/src/arch/ref/bool/vect_s8_is_negative.c +++ b/lib_xcore_math/src/arch/ref/bool/vect_s8_is_negative.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/ref/chunk.c b/lib_xcore_math/src/arch/ref/chunk.c index ff89f546..84774c0a 100644 --- a/lib_xcore_math/src/arch/ref/chunk.c +++ b/lib_xcore_math/src/arch/ref/chunk.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/chunk_s16_accumulate.c b/lib_xcore_math/src/arch/ref/chunk_s16_accumulate.c index 57a4cbe0..6695d511 100644 --- a/lib_xcore_math/src/arch/ref/chunk_s16_accumulate.c +++ b/lib_xcore_math/src/arch/ref/chunk_s16_accumulate.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_conj_macc.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_conj_macc.c index 454079c9..55b7e546 100644 --- a/lib_xcore_math/src/arch/ref/complex/vect_complex_conj_macc.c +++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_conj_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_conjugate.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_conjugate.c index cd36ae79..46c4dba3 100644 --- a/lib_xcore_math/src/arch/ref/complex/vect_complex_conjugate.c +++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_conjugate.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_depth_convert.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_depth_convert.c index 774b5db6..eb7568d8 100644 --- a/lib_xcore_math/src/arch/ref/complex/vect_complex_depth_convert.c +++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_depth_convert.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_macc.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_macc.c index 7f7751c2..6a00a946 100644 --- a/lib_xcore_math/src/arch/ref/complex/vect_complex_macc.c +++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_mag.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_mag.c index cf993a68..bc22f1e9 100644 --- a/lib_xcore_math/src/arch/ref/complex/vect_complex_mag.c +++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_mag.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_mul.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_mul.c index f171a413..5f81024c 100644 --- a/lib_xcore_math/src/arch/ref/complex/vect_complex_mul.c +++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/complex/vect_complex_sum.c b/lib_xcore_math/src/arch/ref/complex/vect_complex_sum.c index 6c50d30e..3d89a07c 100644 --- a/lib_xcore_math/src/arch/ref/complex/vect_complex_sum.c +++ b/lib_xcore_math/src/arch/ref/complex/vect_complex_sum.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/dct/dct.c b/lib_xcore_math/src/arch/ref/dct/dct.c index d84ac42c..2c266868 100644 --- a/lib_xcore_math/src/arch/ref/dct/dct.c +++ b/lib_xcore_math/src/arch/ref/dct/dct.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/dct/dct8x8.c b/lib_xcore_math/src/arch/ref/dct/dct8x8.c index 8d92d2a3..8e1a45f4 100644 --- a/lib_xcore_math/src/arch/ref/dct/dct8x8.c +++ b/lib_xcore_math/src/arch/ref/dct/dct8x8.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/dct/idct.c b/lib_xcore_math/src/arch/ref/dct/idct.c index 65e603d8..25bfef17 100644 --- a/lib_xcore_math/src/arch/ref/dct/idct.c +++ b/lib_xcore_math/src/arch/ref/dct/idct.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/f32.c b/lib_xcore_math/src/arch/ref/f32.c index 147d5dbc..4778f2dd 100644 --- a/lib_xcore_math/src/arch/ref/f32.c +++ b/lib_xcore_math/src/arch/ref/f32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/fft/fft_dif.c b/lib_xcore_math/src/arch/ref/fft/fft_dif.c index 1a84a8e7..b7fb9143 100644 --- a/lib_xcore_math/src/arch/ref/fft/fft_dif.c +++ b/lib_xcore_math/src/arch/ref/fft/fft_dif.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/fft/fft_dit.c b/lib_xcore_math/src/arch/ref/fft/fft_dit.c index 7a30470a..fe7869fd 100644 --- a/lib_xcore_math/src/arch/ref/fft/fft_dit.c +++ b/lib_xcore_math/src/arch/ref/fft/fft_dit.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/fft/fft_util.c b/lib_xcore_math/src/arch/ref/fft/fft_util.c index 73f2fd2f..82353959 100644 --- a/lib_xcore_math/src/arch/ref/fft/fft_util.c +++ b/lib_xcore_math/src/arch/ref/fft/fft_util.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/filter/filter_biquad_s32.c b/lib_xcore_math/src/arch/ref/filter/filter_biquad_s32.c index 750d00bf..191cb2d7 100644 --- a/lib_xcore_math/src/arch/ref/filter/filter_biquad_s32.c +++ b/lib_xcore_math/src/arch/ref/filter/filter_biquad_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/ref/filter/filter_biquad_sat_s32.c b/lib_xcore_math/src/arch/ref/filter/filter_biquad_sat_s32.c index 23490e6d..62133156 100644 --- a/lib_xcore_math/src/arch/ref/filter/filter_biquad_sat_s32.c +++ b/lib_xcore_math/src/arch/ref/filter/filter_biquad_sat_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/ref/filter/filter_fir_s16.c b/lib_xcore_math/src/arch/ref/filter/filter_fir_s16.c index 42c924e5..b91a2e03 100644 --- a/lib_xcore_math/src/arch/ref/filter/filter_fir_s16.c +++ b/lib_xcore_math/src/arch/ref/filter/filter_fir_s16.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/ref/filter/filter_fir_s32.c b/lib_xcore_math/src/arch/ref/filter/filter_fir_s32.c index e4756a02..e597b147 100644 --- a/lib_xcore_math/src/arch/ref/filter/filter_fir_s32.c +++ b/lib_xcore_math/src/arch/ref/filter/filter_fir_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/ref/float_s32.c b/lib_xcore_math/src/arch/ref/float_s32.c index bf1641cd..5cc324f1 100644 --- a/lib_xcore_math/src/arch/ref/float_s32.c +++ b/lib_xcore_math/src/arch/ref/float_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/matrix/mat_mul_s8_x_s8_yield_s32.c b/lib_xcore_math/src/arch/ref/matrix/mat_mul_s8_x_s8_yield_s32.c index bf9f77ca..193f0fa9 100644 --- a/lib_xcore_math/src/arch/ref/matrix/mat_mul_s8_x_s8_yield_s32.c +++ b/lib_xcore_math/src/arch/ref/matrix/mat_mul_s8_x_s8_yield_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/misc.c b/lib_xcore_math/src/arch/ref/misc.c index 01cff09d..18ce4536 100644 --- a/lib_xcore_math/src/arch/ref/misc.c +++ b/lib_xcore_math/src/arch/ref/misc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/qXX.c b/lib_xcore_math/src/arch/ref/qXX.c index 27b4fdd6..69d13f5b 100644 --- a/lib_xcore_math/src/arch/ref/qXX.c +++ b/lib_xcore_math/src/arch/ref/qXX.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/s32_sqrt.c b/lib_xcore_math/src/arch/ref/s32_sqrt.c index ac6a21c2..1033297f 100644 --- a/lib_xcore_math/src/arch/ref/s32_sqrt.c +++ b/lib_xcore_math/src/arch/ref/s32_sqrt.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_abs_clip_rect.c b/lib_xcore_math/src/arch/ref/vect_abs_clip_rect.c index 3a21d3e3..082d72df 100644 --- a/lib_xcore_math/src/arch/ref/vect_abs_clip_rect.c +++ b/lib_xcore_math/src/arch/ref/vect_abs_clip_rect.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_add_sub.c b/lib_xcore_math/src/arch/ref/vect_add_sub.c index 87550144..98f188e6 100644 --- a/lib_xcore_math/src/arch/ref/vect_add_sub.c +++ b/lib_xcore_math/src/arch/ref/vect_add_sub.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_convolve.c b/lib_xcore_math/src/arch/ref/vect_convolve.c index ef542e75..6f2de966 100644 --- a/lib_xcore_math/src/arch/ref/vect_convolve.c +++ b/lib_xcore_math/src/arch/ref/vect_convolve.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_copy.c b/lib_xcore_math/src/arch/ref/vect_copy.c index 2a17e762..1b38dd05 100644 --- a/lib_xcore_math/src/arch/ref/vect_copy.c +++ b/lib_xcore_math/src/arch/ref/vect_copy.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_depth_convert.c b/lib_xcore_math/src/arch/ref/vect_depth_convert.c index 69f63a0c..19f85000 100644 --- a/lib_xcore_math/src/arch/ref/vect_depth_convert.c +++ b/lib_xcore_math/src/arch/ref/vect_depth_convert.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_dot.c b/lib_xcore_math/src/arch/ref/vect_dot.c index 0b98ebaf..700fd2b8 100644 --- a/lib_xcore_math/src/arch/ref/vect_dot.c +++ b/lib_xcore_math/src/arch/ref/vect_dot.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_f32.c b/lib_xcore_math/src/arch/ref/vect_f32.c index 26aaa178..7543c66e 100644 --- a/lib_xcore_math/src/arch/ref/vect_f32.c +++ b/lib_xcore_math/src/arch/ref/vect_f32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_headroom.c b/lib_xcore_math/src/arch/ref/vect_headroom.c index 4700f1d3..e8e77e8d 100644 --- a/lib_xcore_math/src/arch/ref/vect_headroom.c +++ b/lib_xcore_math/src/arch/ref/vect_headroom.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_inverse.c b/lib_xcore_math/src/arch/ref/vect_inverse.c index 656fe774..d8845d86 100644 --- a/lib_xcore_math/src/arch/ref/vect_inverse.c +++ b/lib_xcore_math/src/arch/ref/vect_inverse.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_macc.c b/lib_xcore_math/src/arch/ref/vect_macc.c index 8e3c0ef3..b9069f41 100644 --- a/lib_xcore_math/src/arch/ref/vect_macc.c +++ b/lib_xcore_math/src/arch/ref/vect_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/ref/vect_mul.c b/lib_xcore_math/src/arch/ref/vect_mul.c index e4871391..fa9ea579 100644 --- a/lib_xcore_math/src/arch/ref/vect_mul.c +++ b/lib_xcore_math/src/arch/ref/vect_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_s16_extract.c b/lib_xcore_math/src/arch/ref/vect_s16_extract.c index d4d8153a..a257c884 100644 --- a/lib_xcore_math/src/arch/ref/vect_s16_extract.c +++ b/lib_xcore_math/src/arch/ref/vect_s16_extract.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_sXX.c b/lib_xcore_math/src/arch/ref/vect_sXX.c index 1eefc892..0e6508c3 100644 --- a/lib_xcore_math/src/arch/ref/vect_sXX.c +++ b/lib_xcore_math/src/arch/ref/vect_sXX.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_set.c b/lib_xcore_math/src/arch/ref/vect_set.c index 08269146..3e9ca808 100644 --- a/lib_xcore_math/src/arch/ref/vect_set.c +++ b/lib_xcore_math/src/arch/ref/vect_set.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_shl.c b/lib_xcore_math/src/arch/ref/vect_shl.c index 853ca7ad..5672ec1f 100644 --- a/lib_xcore_math/src/arch/ref/vect_shl.c +++ b/lib_xcore_math/src/arch/ref/vect_shl.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_sqrt.c b/lib_xcore_math/src/arch/ref/vect_sqrt.c index 883a1815..0416aa06 100644 --- a/lib_xcore_math/src/arch/ref/vect_sqrt.c +++ b/lib_xcore_math/src/arch/ref/vect_sqrt.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_stats.c b/lib_xcore_math/src/arch/ref/vect_stats.c index c937480c..c31fb5ce 100644 --- a/lib_xcore_math/src/arch/ref/vect_stats.c +++ b/lib_xcore_math/src/arch/ref/vect_stats.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_sum.c b/lib_xcore_math/src/arch/ref/vect_sum.c index a0683d8f..4a750db1 100644 --- a/lib_xcore_math/src/arch/ref/vect_sum.c +++ b/lib_xcore_math/src/arch/ref/vect_sum.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vect_zip.c b/lib_xcore_math/src/arch/ref/vect_zip.c index 62455503..f6b75b8e 100644 --- a/lib_xcore_math/src/arch/ref/vect_zip.c +++ b/lib_xcore_math/src/arch/ref/vect_zip.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c b/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c index 346d4214..15817ad8 100644 --- a/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c +++ b/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -82,7 +82,12 @@ int8_t vlmul8( const int8_t y) { int32_t p = ((int32_t)x)*y; + + #if defined(__VX4B__) + p = ROUND_SHR32(p, 7); + #else p = ROUND_SHR32(p, 6); + #endif return SAT(8)(p); } @@ -197,6 +202,15 @@ int16_t vlmul16( return SAT(16)(p); } +int16_t vlmul16_vx4b( + const int16_t x, + const int16_t y) +{ + int32_t p = ((int32_t)x)*y; + p = ROUND_SHR32(p, 15); + return SAT(16)(p); +} + vpu_int16_acc_t vlmacc16( const vpu_int16_acc_t acc, @@ -225,11 +239,20 @@ vpu_int16_acc_t vlmaccr16( int16_t vlsat16( const vpu_int16_acc_t acc, + #if defined(__VX4B__) + const right_shift_t sat) + #else const unsigned sat) + #endif { - vpu_int16_acc_t s = acc; + int64_t s = acc; - if(sat >= 32) return (acc >= 0)? 0 : -1; + #if defined(__VX4B__) + if(sat < 0) + s = s << (-sat); + #else + if(sat >= 32) return (acc >= 0)? 0 : -1; + #endif if(sat > 0) s = ((acc >> (sat-1)) + 1) >> 1; diff --git a/lib_xcore_math/src/arch/vx4b/NOTES.rst b/lib_xcore_math/src/arch/vx4b/NOTES.rst new file mode 100644 index 00000000..30be7231 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/NOTES.rst @@ -0,0 +1,5 @@ +The following functions have not been vectorised: + +chunk_s16_accumulate +vect_s16_dot +float_s32 \ No newline at end of file diff --git a/lib_xcore_math/src/arch/vx4b/asm_helper.h b/lib_xcore_math/src/arch/vx4b/asm_helper.h new file mode 100644 index 00000000..43d5bfc6 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/asm_helper.h @@ -0,0 +1,25 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#ifndef ASM_HELPER_H_ +#define ASM_HELPER_H_ + +#include "xmath/xmath_conf.h" + +#define EPV_LOG2_S8 5 +#define EPV_LOG2_S16 4 +#define EPV_LOG2_S32 3 +#define EPV_LOG2_C32 2 + +#define SIZEOF_LOG2_S8 0 +#define SIZEOF_LOG2_S16 1 +#define SIZEOF_LOG2_S32 2 +#define SIZEOF_LOG2_C32 3 + +#define HR_SUB_S8 7 +#define HR_SUB_S16 15 +#define HR_SUB_S32 31 + + + +#endif // ASM_HELPER_H_ diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.almost b/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.almost new file mode 100644 index 00000000..67b8beb9 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.almost @@ -0,0 +1,97 @@ +// Copyright 2020-2022 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +.text +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +/* + +The first time this is called on a vector, `vpu_init` should be set +to 0x0100 (16-bit mode with no headroom mask). This function will +vsetc with that value, and the result of vgetc will be returned at +the end of this function. This way the caller need not repeatedly +compare headroom for each chunk with the minimum found so far. + +Instead, after all chunks have been processed, the headroom can be +computed from the final value returned. + +unsigned chunk_s16_accumulate( + split_acc_s32_t* acc, + const int16_t b[VPU_INT16_EPV], + const right_shift_t b_shr, + const unsigned vpu_ctrl); +*/ +#define FUNCTION_NAME chunk_s16_accumulate +#define NSTACKWORDS (4 + 8+4) + +#define STACK_VEC_C (NSTACKWORDS - 8-4) + +#define acc x10 +#define b x11 +#define b_shr x12 +#define vec_c x13 + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + +{ mv t3, a3 ; xm.vldd acc} +{ nop ; xm.vsetc t3} +{ xm.cls t3, b_shr ; nop} +{ addi vec_c,sp, (STACK_VEC_C)*4 ; xm.brff t3, .L_b_shr_neg }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_b_shr_pos: + // non-neg b_shr means we want vlashr + la t3, vpu_vec_0x0001 + xm.vlashr b, b_shr + { xm.mkmski b_shr, 32 ; xm.vldc t3} + xm.vstrpv vec_c, b_shr + addi t3, acc, 32 + { nop ; xm.vldd acc} + { nop ; xm.vldr t3} + { nop ; xm.vlmacc0 vec_c} + xm.vlmacc1 vec_c + { nop ; xm.vstd acc} + xm.vstrpv t3, b_shr + { nop ; xm.vgetc t3} + { mv a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_b_shr_neg: + // neg b_shr means we want to set c[] to a power of 2 + la t3, vpu_vec_0x0001 + xm.vlashr t3, b_shr + { xm.mkmski b_shr, 32 ; xm.vldd acc} + xm.vstrpv vec_c, b_shr + { nop ; xm.vldc vec_c} + addi t3, acc, 32 + { nop ; xm.vldr t3} + { nop ; xm.vlmacc0 b} + xm.vlmacc1 vec_c + { nop ; xm.vstd acc} + xm.vstrpv t3, b_shr + { nop ; xm.vgetc t3} + { mv a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end_unpack: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c b/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c new file mode 100644 index 00000000..3670acde --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c @@ -0,0 +1,47 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined (__VX4B__) +#include +#include + +#include "xmath/xmath.h" +#include "vpu_helper.h" +#include "xmath/xs3/vpu_scalar_ops.h" + + + + + + +unsigned chunk_s16_accumulate( + split_acc_s32_t* acc, + const int16_t b[VPU_INT16_EPV], + const right_shift_t b_shr, + const unsigned vpu_ctrl) +{ + unsigned vc = vpu_ctrl & 0x1F; + + for(int k = 0; k < VPU_INT16_EPV; k++){ + int32_t hi = acc->vD[k]; + uint32_t lo = acc->vR[k]; + int32_t acc32 = (hi << 16) | lo; + + int32_t b_mod = b[k]; + if(b_shr >= 0) + b_mod = b_mod >> b_shr; + else + b_mod = b_mod << (-b_shr); + + acc32 += b_mod; + + acc->vD[k] = (acc32 >> 16) & 0xFFFF; + acc->vR[k] = acc32 & 0xFFFF; + + unsigned tmp = 15 - HR_S16(acc->vD[k]); + vc = MAX(vc, tmp); + } + return vc; +} + + +#endif \ No newline at end of file diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_dot.S b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_dot.S new file mode 100644 index 00000000..d01b2f99 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_dot.S @@ -0,0 +1,50 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +.text +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +/* +int32_t chunk_s32_dot( + const int32_t b[VPU_INT32_EPV], + const q2_30 c[VPU_INT32_EPV]); +*/ +#define FUNCTION_NAME chunk_s32_dot +#define NSTACKWORDS (4) + +#define b x10 +#define c x11 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ +{ li t3, 0 ; xm.vldc b} +{ xm.mkmski t3, 4 ; xm.vsetc t3} +{ nop ; xm.vclrdr } +{ addi a2,sp, 0 ; xm.vlmaccr0 c} + xm.vstrpv a2, t3 +{ nop ; lw a0, 0 (sp)} + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end_unpack: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_log.S b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_log.S new file mode 100644 index 00000000..33023259 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_log.S @@ -0,0 +1,176 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +Condition: 0 < ldexp(b[k], -30) < 2 + + +void chunk_s32_log( + q8_24 a[], + const int32_t b[], + const exponent_t b_exp); +*/ + + +#define NSTACKWORDS (8+48+4) + +#define FUNCTION_NAME chunk_s32_log + +#define SP_VEC_X1 ((NSTACKWORDS) - 8 -4) +#define SP_VEC_X2 ((NSTACKWORDS) - 16-4) +#define SP_VEC_X3 ((NSTACKWORDS) - 24-4) +#define SP_VEC_X4 ((NSTACKWORDS) - 32-4) +#define SP_VEC_X5 ((NSTACKWORDS) - 40-4) +#define SP_VEC_X6 ((NSTACKWORDS) - 48-4) + + +.text +.p2align 2 + +.L_ps_coef1: .word -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000 /* Translation error on this line: unexpected token at position 13. */ +.L_ps_coef2: .word 0x555555, 0x555555, 0x555555, 0x555555, 0x555555, 0x555555, 0x555555, 0x555555 /* Translation error on this line: unexpected token at position 13. */ +.L_ps_coef3: .word -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000 /* Translation error on this line: unexpected token at position 13. */ +.L_ps_coef4: .word 0x333333, 0x333333, 0x333333, 0x333333, 0x333333, 0x333333, 0x333333, 0x333333 /* Translation error on this line: unexpected token at position 13. */ +.L_ps_coef5: .word -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab /* Translation error on this line: unexpected token at position 13. */ + +.L_ln_2: .word 0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe /* Translation error on this line: unexpected token at position 9. */ + +#define a x10 +#define b x11 +#define b_exp x12 +#define mantB x13 +#define tmpA x18 +#define tmpB x19 +#define tmpC x20 +#define vec_x x21 +#define mantA x28 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + +{ addi vec_x,sp, (SP_VEC_X1)*4 ; li t3, 0 } +{nop; ; xm.vsetc t3} + xm.lddi mantA,mantB, 0(b) + { xm.cls tmpA, mantA ; nop } + { nop ; xm.cls tmpB, mantB } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi mantA,mantB, 0(vec_x) +{ sub mantA, b_exp, tmpA ; sub mantB, b_exp, tmpB } + xm.stdi mantA,mantB, 0(a) + + xm.lddi mantA,mantB, 8(b) + { xm.cls tmpA, mantA ; nop } + { nop ; xm.cls tmpB, mantB } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi mantA,mantB, 8(vec_x) +{ sub mantA, b_exp, tmpA ; sub mantB, b_exp, tmpB } + xm.stdi mantA,mantB, 8(a) + + xm.lddi mantA,mantB, 16(b) + { xm.cls tmpA, mantA ; nop } + { nop ; xm.cls tmpB, mantB } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi mantA,mantB, 16(vec_x) +{ sub mantA, b_exp, tmpA ; sub mantB, b_exp, tmpB } + xm.stdi mantA,mantB, 16(a) + + xm.lddi mantA,mantB, 24(b) + { xm.cls tmpA, mantA ; nop } + { nop ; xm.cls tmpB, mantB } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi mantA,mantB, 24(vec_x) +{ sub mantA, b_exp, tmpA ; sub mantB, b_exp, tmpB } + xm.stdi mantA,mantB, 24(a) + +{ li tmpA, 24 ;nop} + + la t3, vpu_vec_0x20000000 +{ nop ; xm.vclrdr } +{ xm.neg tmpA, tmpA ; nop } + xm.vlashr a, tmpA +{ xm.ldap t3, .L_ln_2 ; xm.vladd t3} +{ nop ; xm.vlmul0 t3} + +{ mv t3, vec_x ; xm.vstr a} +{ nop ; xm.vldr t3} + la t3, vpu_vec_0x00000002 +xm.vlsat t3 + la t3, vpu_vec_neg_0x40000000 +{ addi tmpB,sp, (SP_VEC_X1)*4 ; xm.vladd t3} + +#undef mantA +#undef mantB + +{ addi vec_x,sp, (SP_VEC_X2)*4 ; xm.vstr vec_x} +{ nop ; xm.vlmul0 tmpB} // (x-1.0)^2 +{ addi vec_x,sp, (SP_VEC_X3)*4 ; xm.vstr vec_x} +{ nop ; xm.vlmul0 tmpB} // (x-1.0)^3 +{ addi vec_x,sp, (SP_VEC_X4)*4 ; xm.vstr vec_x} +{ nop ; xm.vlmul0 tmpB} // (x-1.0)^4 +{ addi vec_x,sp, (SP_VEC_X5)*4 ; xm.vstr vec_x} +{ li tmpA, 6 ; xm.vlmul0 tmpB} // (x-1.0)^5 +{ addi vec_x,sp, (SP_VEC_X6)*4 ; xm.vstr vec_x} +{ xm.ldap t3, .L_ps_coef5 ; xm.vlmul0 tmpB} // (x-1.0)^6 +{ addi tmpB,sp, (SP_VEC_X1)*4 ; xm.vstr vec_x} + + xm.vlashr tmpB, tmpA // vR[] = coef[0] * x +{ xm.ldap t3, .L_ps_coef4 ; xm.vldc t3} // vC[] = coef[5] +{ addi vec_x,sp, (SP_VEC_X5)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[5] * x^6 +{ xm.ldap t3, .L_ps_coef3 ; xm.vldc t3} // vC[] = coef[4] +{ addi vec_x,sp, (SP_VEC_X4)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[4] * x^5 +{ xm.ldap t3, .L_ps_coef2 ; xm.vldc t3} // vC[] = coef[3] +{ addi vec_x,sp, (SP_VEC_X3)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[3] * x^4 +{ xm.ldap t3, .L_ps_coef1 ; xm.vldc t3} // vC[] = coef[2] +{ addi vec_x,sp, (SP_VEC_X2)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[2] * x^3 +{ nop ; xm.vldc t3} // vC[] = coef[1] +{ addi vec_x,sp, (SP_VEC_X1)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[1] * x^2 + +{ nop ; xm.vladd a} +{ nop ; xm.vstr a} + +// Any inputs that were 0 should become INT32_MIN + la t3, vpu_vec_0x00000001 +{ nop ; xm.vldr t3} +{ nop ; xm.vlsub b} +{ nop ; xm.vdepth1 } +{ nop ; xm.vstr vec_x} +{ nop ; lw tmpA,0 ( vec_x)} +{ mv tmpB, tmpA ; nop } + xm.zip tmpB, tmpA, 0 + mv tmpB, tmpA + xm.zip tmpB, tmpA, 0 + la t3, vpu_vec_0x80000000 +{ nop ; xm.vldr t3} + xm.vstrpv a, tmpA + + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series.S b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series.S new file mode 100644 index 00000000..27a8421a --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series.S @@ -0,0 +1,87 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.align 4; /* Translation error on this line: unexpected token at position 8. */ + +/* + void chunk_q30_power_series( + int32_t a[VPU_INT32_EPV], + const q2_30 b[VPU_INT32_EPV], + const int32_t coef[], + const unsigned term_count); +*/ +#define FUNCTION_NAME chunk_q30_power_series +#define NSTACKWORDS (8 + 2 * 8 + 4) + +#define VEC_POW (NSTACKWORDS - 8-4) +#define VEC_ACC (NSTACKWORDS - 16-4) + +#define a x10 +#define b x11 +#define coef x12 +#define len x13 +#define vec_pow x18 +#define vec_acc x19 +#define tmp x20 +#define _32 x21 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 +{ addi len, len, -1 ; nop } + xm.stdsp s7,s6,16 + +{ li t3, 0 ; xm.vldc coef} +{ nop ; xm.vsetc t3} +lui t3, %hi(vpu_vec_0x40000000) + addi t3,t3, %lo(vpu_vec_0x40000000) +{ addi vec_pow,sp, (VEC_POW)*4 ; xm.vldr t3} +{ addi vec_acc,sp, (VEC_ACC)*4 ; xm.vstr vec_pow} +{ li _32, 32 ; xm.vclrdr } +{ add coef, coef, _32 ; xm.vlmacc0 vec_pow} +{ mv t3, vec_pow ; xm.vstr vec_acc} + + .L_loop_top: + { nop ; xm.vldr t3} + { nop ; xm.vlmul0 b} + { mv t3, vec_acc ; xm.vstr vec_pow} + { nop ; xm.vldr t3} + { add coef, coef, _32 ; xm.vldc coef} + { addi len, len, -1 ; xm.vlmacc0 vec_pow} + { nop ; xm.vstr vec_acc} + { mv t3, vec_pow ; xm.bt len, .L_loop_top } + +{ nop ; xm.vstr a} + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end_unpack: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series_v2.S b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series_v2.S new file mode 100644 index 00000000..3e4d48a5 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/chunk_s32/chunk_s32_power_series_v2.S @@ -0,0 +1,108 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.align 4; /* Translation error on this line: unexpected token at position 8. */ + +/* + The difference between this and chunk_q30_power_series() is that this one doesn't require + the coefficient vector to contain redundant elements for each vector index. Instead, this version + will broadcast the coefficient to a chunk. For this reason it is significantly slower, but it is + also less wasteful of memory. + + NOTE: This hasn't (yet) been officially added to the API + + void chunk_q30_power_series_v2( + int32_t a[], + const q2_30 b[], + const int32_t coef[], + const unsigned term_count); +*/ +#define FUNCTION_NAME chunk_q20_power_series +#define NSTACKWORDS (12 + 4 * 8 + 4) + +#define VEC_POW (NSTACKWORDS - 8-4) +#define VEC_ACC (NSTACKWORDS - 16-4) +#define VEC_TMP (NSTACKWORDS - 24-4) // --> [coef, coef, 0, 0, 0, 0, 0, 0] + // (last six elements must stay zeros) +#define VEC_COEF (NSTACKWORDS - 30-4) // --> [coef, coef, coef, coef, coef, coef, coef, coef] + // (overlaps VEC_TMP and that's fine) + +#define a x10 +#define b x11 +#define coef x12 +#define len x13 +#define vec_pow x18 +#define vec_acc x19 +#define vec_coef x20 +#define tmp x21 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 +{ addi tmp,sp, (VEC_TMP)*4 ; xm.vclrdr } + xm.stdsp s7,s6,16 +{ li t3, 0 ; xm.vstd tmp} +{ addi len, len, -1 ; xm.vsetc t3} +{ addi vec_pow,sp, (VEC_POW)*4 ; lw t3,0 ( coef)} + xm.stdi t3,t3, 0(tmp) +{ addi vec_acc,sp, (VEC_ACC)*4 ; xm.vldd tmp} +{ addi tmp,sp, (VEC_COEF)*4 ; xm.vfttf } +{ addi coef, coef, 4 ; xm.vstd vec_coef} +{ nop ; xm.vldc vec_coef} +lui t3, %hi(vpu_vec_0x40000000) + addi t3,t3, %lo(vpu_vec_0x40000000) +{ nop ; xm.vldr t3} +{ nop ; xm.vstr vec_pow} +{ nop ; xm.vclrdr } +{ nop ; xm.vlmacc0 vec_pow} +{ mv t3, vec_pow ; xm.vstr vec_acc} + + .L_loop_top: + { nop ; xm.vldr t3} + { nop ; xm.vlmul0 b} + { mv t3, vec_acc ; xm.vstr vec_pow} + { nop ; xm.vldr t3} + { addi coef, coef, 4 ; lw t3,0 ( coef)} + xm.stdi t3,t3, 0(tmp) + { nop ; xm.vldd tmp} + { nop ; xm.vfttf } + { nop ; xm.vstd vec_coef} + { nop ; xm.vldc vec_coef} + { addi len, len, -1 ; xm.vlmacc0 vec_pow} + { nop ; xm.vstr vec_acc} + { mv t3, vec_pow ; xm.bt len, .L_loop_top } + +{ nop ; xm.vstr a} + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end_unpack: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct12_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct12_s32.S new file mode 100644 index 00000000..034055d7 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct12_s32.S @@ -0,0 +1,140 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform an 12-point forward DCT. + +void dct12_forward( + int32_t y[12], + const int32_t x[12]); + +*/ + +#define FUNCTION_NAME dct12_forward +#define NSTACKWORDS 12 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define VEC_TMP (NSTACKWORDS - 8 - 2) + +#define y x10 +#define x x11 + +#define a x12 +#define b x13 +#define c x18 +#define d x19 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + // Reverse the tail half of x[], placing it in y[] + // leave the head half where it is + xm.lddi a,b, 24(x) + xm.lddi c,d, 40 (x)// Load these in case x and y are the same address + xm.stdi b,a, 40(y) + xm.lddi a,b, 32(x) + li t3, 0x80 + xm.stdi b,a, 32(y) + xm.stdi d,c, 24(y) + +// Take the sum and difference of the head and (flipped) tail +// also dividing by 2 so that we don't saturate. +{ li a, 24 ; nop } +{ add t3, y, a ; xm.vsetc t3} +{ nop ; xm.vldr t3} +{ addi t3,sp, (VEC_TMP)*4 ; xm.vladsb x} +{ add x, y, a ; xm.vstd t3} + +#undef x //no longer needed +#undef y +// now x10 points at the first half of y and x11 at the second half +#define left x10 +#define right x11 +{ nop ; xm.vstr left} +{ xm.mkmski a, 24 ; xm.vldr t3} +lui t3, %hi(dct12_lut) + addi t3,t3, %lo(dct12_lut) +{ addi t3,sp, (VEC_TMP)*4 ; xm.vlmul0 t3} + xm.vstrpv t3, a + + +// DCT the the sum of the head and tail, placing the result in +// the second half of y[] (for now) +{ li b , 32 ; xm.vldc left} +lui t3, %hi(dct6_matrix) + addi t3,t3, %lo(dct6_matrix) +{ mv a, t3 ; xm.vclrdr } +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ nop ; xm.vlmaccr0 t3} +lui t3, %hi(vpu_vec_0x10000000) + addi t3,t3, %lo(vpu_vec_0x10000000) // ashr vR[] right 2 bits +{ xm.mkmski t3, 24 ; xm.vlmul0 t3} + xm.vstrpv right, t3 // put in right half so left half is clear + // when we start interleaving them + +// DCT the difference of head and tail, placing the result +// on the stack +{ addi t3,sp, (VEC_TMP)*4 ; xm.vclrdr } +{ mv t3, a ; xm.vldc t3} // DCT right half (from stack vec) +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ nop ; xm.vlmaccr0 t3} +lui t3, %hi(vpu_vec_0x20000000) + addi t3,t3, %lo(vpu_vec_0x20000000) // shr vR[] right 1 bit (to simplify deconvolution) +{ addi t3,sp, (VEC_TMP)*4 ; xm.vlmul0 t3} +{ nop ; xm.vstr t3} // store on stack so we don't clobber + // anything when we interleave + +// Now simultaneously rearrange stuff in memory while deconvolving the +// second DCT that we did + xm.lddi b,d, 0(t3) + srai b, b, 1 +{ nop ; lw a,0 ( right)} + xm.stdi a,b, 0(left) +{ sub d, d, b ; lw a,4 ( right)} + xm.stdi a,d, 8(left) + xm.lddi b,c, 8(t3) +{ sub b, b, d ; lw a,8 ( right)} + xm.stdi a,b, 16(left) +{ sub c, c, b ; lw a,12 ( right)} + xm.stdi a,c, 24(left) + xm.lddi b,d, 16 (t3) +{ sub b, b, c ; lw a,16 ( right)} + xm.stdi a,b, 32(left) +{ sub d, d, b ; lw a,20 ( right)} + xm.stdi a,d, 40(left) + + xm.lddsp s3,s2,0 +{ nop ; xm.retsp (NSTACKWORDS)*4 } + + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct16_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct16_s32.S new file mode 100644 index 00000000..1820d37e --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct16_s32.S @@ -0,0 +1,157 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform an 16-point forward DCT. + +void dct16_forward( + int32_t y[16], + const int32_t x[16]); + +*/ + +#define FUNCTION_NAME dct16_forward +#define NSTACKWORDS 12 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define VEC_TMP (NSTACKWORDS - 8 - 2) + +#define y x10 +#define x x11 + +#define a x12 +#define b x13 +#define c x18 +#define d x19 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + // Reverse the tail half of x[], placing it in y[] + // leave the head half where it is + xm.lddi a,b, 32(x) + xm.lddi c,d, 56(x) + xm.stdi b,a, 56(y) + xm.stdi d,c, 32(y) + + xm.lddi a,b, 40(x) + xm.lddi c,d, 48(x) + xm.stdi b,a, 48(y) + xm.stdi d,c, 40(y) + + li t3, 0x80 + +// Take the sum and difference of the head and (flipped) tail +// also dividing by 2 so that we don't saturate. +{ li a, 32 ; nop } +{ add t3, y, a ; xm.vsetc t3} +{ nop ; xm.vldr t3} +{ addi t3,sp, (VEC_TMP)*4 ; xm.vladsb x} +{ add x, y, a ; xm.vstd t3} + +#undef x //no longer needed +#undef y +// now x10 points at the first half of y and x11 at the second half +#define left x10 +#define right x11 +{ nop ; xm.vstr left} +{ nop ; xm.vldr t3} +lui t3, %hi(dct16_lut) + addi t3,t3, %lo(dct16_lut) +{ addi t3,sp, (VEC_TMP)*4 ; xm.vlmul0 t3} +{ nop ; xm.vstr t3} + + +// DCT the the sum of the head and tail, placing the result in +// the second half of y[] (for now) +{ li b, 32 ; xm.vldc left} +lui t3, %hi(dct8_matrix) + addi t3,t3, %lo(dct8_matrix) +{ mv a, t3 ; xm.vclrdr } +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +xm.vlsat t3 +lui t3, %hi(vpu_vec_0x10000000) + addi t3,t3, %lo(vpu_vec_0x10000000) // ashr vR[] right 2 bits +{ xm.mkmski t3, 24 ; xm.vlmul0 t3} +{ nop ; xm.vstr right} + +// DCT the difference of head and tail, placing the result +// on the stack +{ addi t3,sp, (VEC_TMP)*4 ; xm.vclrdr } +{ mv t3, a ; xm.vldc t3} // DCT right half (from stack vec) +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +{ add t3, t3, b ; xm.vlmaccr0 t3} +xm.vlsat t3 +lui t3, %hi(vpu_vec_0x20000000) + addi t3,t3, %lo(vpu_vec_0x20000000) // shr vR[] right 1 bit (to simplify deconvolution) +{ addi t3,sp, (VEC_TMP)*4 ; xm.vlmul0 t3} +{ nop ; xm.vstr t3} // store on stack so we don't clobber + // anything when we interleave + +// Now simultaneously rearrange stuff in memory while deconvolving the +// second DCT that we did + xm.lddi b,d, 0(t3) + srai c, b, 1 +{ nop ; lw a,0 ( right)} + xm.stdi a,c, 0(left) +{ sub c, d, c ; lw a,4 ( right)} + xm.stdi a,c, 8(left) + + xm.lddi b,d, 8(t3) +{ sub c, b, c ; lw a,8 ( right)} + xm.stdi a,c, 16(left) +{ sub c, d, c ; lw a,12 ( right)} + xm.stdi a,c, 24(left) + + xm.lddi b,d, 16(t3) +{ sub c, b, c ; lw a,16 ( right)} + xm.stdi a,c, 32(left) +{ sub c, d, c ; lw a,20 ( right)} + xm.stdi a,c, 40(left) + + xm.lddi b,d, 24(t3) +{ sub c, b, c ; lw a,24 ( right)} + xm.stdi a,c, 48(left) +{ sub c, d, c ; lw a,28 ( right)} + xm.stdi a,c, 56(left) + + xm.lddsp s3,s2,0 +{ nop ; xm.retsp (NSTACKWORDS)*4 } + + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct24_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct24_s32.S new file mode 100644 index 00000000..21156961 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct24_s32.S @@ -0,0 +1,178 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform an 24-point forward DCT. + +Computed directly by multiplying by the DCT matrix. The output has elements ordered +so that when used in recursive DCT computation the bit-reversed indexing can be used +to deconvolve those that need it. + +void dct24_forward( + int32_t y[24], + const int32_t x[24]); + +*/ + +#define FUNCTION_NAME dct24_forward +#define NSTACKWORDS 44 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define STK_VEC_HEAD (NSTACKWORDS - 16-2) +#define STK_VEC_TAIL (NSTACKWORDS - 8-2) + +#define y x10 +#define x x11 + +#define a x12 +#define b x13 +#define c x18 +#define d x19 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + + // Reverse the tail half of x[], placing it in y[] + // leave the head half where it is + xm.lddi a,b, 48(x) + xm.lddi c,d, 88(x) + xm.stdi b,a, 88(y) + xm.stdi d,c, 48(y) + + xm.lddi a,b, 56(x) + xm.lddi c,d, 80(x) + xm.stdi b,a, 80(y) + xm.stdi d,c, 56(y) + + li t3, 0x80 + + xm.lddi a,b, 64(x) + xm.lddi c,d, 72(x) + xm.stdi b,a, 72(y) + xm.stdi d,c, 64(y) + +// Take the sum and difference between the head and (flipped) tail +// the sum goes into y[0:12], the difference goes into tmp[0:12] +{ li a, 48 ; addi d,sp, (STK_VEC_HEAD)*4 } +{ add t3, y, a ; xm.vsetc t3} // x28 <-- &y[12] +{ li b, 32 ; xm.vldr t3} // vR[] <-- y[12:20] +{ nop ; xm.vladsb x} // vR[] <-- sum; vD[] <-- diff +{ nop ; xm.vstd d} // tmp[0:8] <-- diff[0:8] +{ add t3, t3, b ; xm.vstr y} // y[0:8] <-- sum[0:8] +{ add x, x, b ; xm.vldr t3} // vR[] <-- y[20:24] +{ addi t3,sp, (STK_VEC_TAIL)*4 ; xm.vladsb x} // sum/diff; orig x no longer needed +{ add t3, y, b ; xm.vstd t3} // tmp[8:12] <-- diff[8:12] +{ add x, y, a ; xm.vstr t3} // y[8:12] <-- sum[8:12] + +// multiply tail component by DCT LUT +lui t3, %hi(dct24_lut) + addi t3,t3, %lo(dct24_lut) +{ nop ; xm.vldr t3} +{ add a, d, b ; xm.vlmul0 d} +{ add t3, t3, b ; xm.vstr d} +{ nop ; xm.vldr t3} +{ nop ; xm.vlmul0 a} +{ nop ; xm.vstr a} + +#define left x10 // Contains &y[0] +#define right x11 // Contains &y[12] + +// perform 12-point DCTs on the head and tail sub-sequences. +// y[0:12] (head) --> DCT12 --> y[12:24] +// tmp[0:12] (tail) --> DCT12 --> tmp[0:12] +// The head is being moved to the end of y so that it isn't in +// the way when we need to do deconvolution + xm.stdsp a1,a0,16 + xm.stdsp a3,a2,24 + +// DCT12(head[]) +{ mv a0, right ; mv a1, left } +lui t3, %hi(dct12_forward) + addi t3,t3, %lo(dct12_forward) +{ nop ; jalr t3 } +// DCT12(tail[]) +{ xm.ldawsp a0, STK_VEC_HEAD*4 ; nop} +{ xm.ldawsp a1, STK_VEC_HEAD*4 ; nop} +lui t3, %hi(dct12_forward) + addi t3,t3, %lo(dct12_forward) +{ nop ; jalr t3 } + xm.lddsp a1,a0,16 + xm.lddsp a3,a2,24 + +// Before deconvolution, right-shift the head vector 2 bits, and +// right-shift the tail vector 1 bit +{ li a, 1 ; xm.mkmski c, 16 } + xm.vlashr d, a +{ addi t3,sp, (STK_VEC_TAIL)*4 ; xm.vstr d} + xm.vlashr t3, a +{ li a, 2 ; xm.vstr t3} + xm.vlashr right, a +{ add t3, right, b ; xm.vstr right} + xm.vlashr t3, a + xm.vstrpv t3, c + +// Finally, begin deconvolving and interleaving + +{ mv t3, d ; nop } + xm.lddi b,d, 0(t3) + srai b, b, 1 +{ nop ; lw a,0 ( right)} + xm.stdi a,b, 0(left) +{ sub d, d, b ; lw a,4 ( right)} + xm.stdi a,d, 8(left) + xm.lddi b,c, 8(t3) +{ sub b, b, d ; lw a,8 ( right)} + xm.stdi a,b, 16(left) +{ sub c, c, b ; lw a,12 ( right)} + xm.stdi a,c, 24(left) + xm.lddi b,d, 16 (t3) +{ sub b, b, c ; lw a,16 ( right)} + xm.stdi a,b, 32(left) +{ sub d, d, b ; lw a,20 ( right)} + xm.stdi a,d, 40(left) + + xm.lddi b,c, 24(t3) +{ sub b, b, d ; lw a,24 ( right)} + xm.stdi a,b, 48(left) +{ sub c, c, b ; lw a,28 ( right)} + xm.stdi a,c, 56(left) + + xm.lddi b,d, 32(t3) +{ sub b, b, c ; lw a,32 ( right)} + xm.stdi a,b, 64(left) +{ sub d, d, b ; lw a,36 ( right)} + xm.stdi a,d, 72(left) + + xm.lddi b,c, 40(t3) +{ sub b, b, d ; lw a,40 ( right)} + xm.stdi a,b, 80(left) +{ sub c, c, b ; lw a,44 ( right)} + xm.stdi a,c, 88(left) + + xm.lddsp s3,s2,8 +{ nop ; xm.retsp (NSTACKWORDS)*4 } + + + +.set FUNCTION_NAME.nstackwords,(NSTACKWORDS+12) +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct6_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct6_s32.S new file mode 100644 index 00000000..0d57c0f6 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct6_s32.S @@ -0,0 +1,65 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform an 6-point forward DCT. + +Computed directly by multiplying by the DCT matrix. The output has elements ordered +so that when used in recursive DCT computation the bit-reversed indexing can be used +to deconvolve those that need it. + +void dct6_forward( + int32_t y[6], + const int32_t x[6]); + +*/ + +#define FUNCTION_NAME dct6_forward +#define NSTACKWORDS 0 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define y x10 +#define x x11 +#define mask x12 +#define _32 x13 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ +{ li t3, 0 ; li _32, 32 } +{ xm.mkmski mask, 24 ; xm.vsetc t3} +{ nop ; xm.vldc x} +lui t3, %hi(dct6_matrix) + addi t3,t3, %lo(dct6_matrix) +{ nop ; xm.vclrdr } +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ nop ; xm.vlmaccr0 t3} + xm.vstrpv y, mask +{ nop ; xm.retsp (NSTACKWORDS)*4 } + + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct8_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct8_s32.S new file mode 100644 index 00000000..806ad559 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct8_s32.S @@ -0,0 +1,67 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform an 8-point forward DCT. + +Computed directly by multiplying by the DCT matrix. + +headroom_t dct8_forward( + int32_t y[8], + const int32_t x[8]); + +*/ + +#define FUNCTION_NAME dct8_forward +#define NSTACKWORDS 0 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +#define y x10 +#define x x11 +#define tmp x12 +#define _32 x13 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ +{ li t3, 0 ; li _32, 32 } +{ nop ; xm.vsetc t3} +lui t3, %hi(dct8_matrix) + addi t3,t3, %lo(dct8_matrix) +{ nop ; xm.vclrdr } +{ nop ; xm.vldc x} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +xm.vlsat t3 +{ nop ; xm.vstr y} +{ li a0, 31 ; xm.vgetc t3} +{ xm.zexti t3, 5 ; nop } +{ sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct_adsb_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct_adsb_s32.S new file mode 100644 index 00000000..287c18b6 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct_adsb_s32.S @@ -0,0 +1,78 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +headroom_t dct_adsb_s32( + int32_t sums[], + int32_t diffs[], + const int32_t head[], + const int32_t tail[], + const unsigned chunks, + const int32_t dct_lut[]); + +*/ + +#define FUNCTION_NAME dct_adsb_s32 +#define NSTACKWORDS 8 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define sums x10 +#define diffs x11 +#define head x12 +#define tail x13 +#define chunks x18 +#define lut x19 +#define _32 x20 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + +{ li _32, 32 ; nop} +mv chunks, a4 + +{ xm.shli t3, _32, 2 ; nop} +mv lut, a5 + +{ mv t3, tail ; xm.vsetc t3} + +.L_loop_top: + { addi chunks, chunks, -1 ; xm.vldr t3} + { add tail, tail, _32 ; xm.vladsb head} + { add head, head, _32 ; xm.vstr sums} + { mv t3, lut ; xm.vstd diffs} + { add lut, lut, _32 ; xm.vldr t3} + { add sums, sums, _32 ; xm.vlmul0 diffs} + { add diffs, diffs, _32 ; xm.vstr diffs} + { mv t3, tail ; xm.bt chunks, .L_loop_top } +.L_loop_bot: + + xm.lddsp s5,s4,8 + xm.lddsp s3,s2,0 +{ li a0, 31 ; xm.vgetc t3} +{ xm.zexti t3, 5 ; nop } +{ sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/dct_deconvolve_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/dct_deconvolve_s32.S new file mode 100644 index 00000000..f55c7f00 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/dct_deconvolve_s32.S @@ -0,0 +1,81 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +length must be a multiple of 8 + +void dct_deconvolve_s32( + int32_t res[], + const int32_t B[], + const int32_t D[], + const unsigned length); + +*/ + +#define FUNCTION_NAME dct_deconvolve_s32 +#define NSTACKWORDS 8 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define res x10 +#define B x11 +#define D x12 +#define len x13 +#define even x18 +#define a x19 +#define b x20 +#define c x21 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + +// Just set it up so that c contains half D[0], so that when +// it's subtracted from D[0] we get (D[0] >> 1) + { srli len, len, 3 ; lw c,0 ( D)} + srai c, c, 1 + { li t3, 16 ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.lddi a,b, 0(D) + { sub a, a, c ; lw even,0 ( B)} + xm.stdi even,a, 0(res) + { sub b, b, a ; lw even,4 ( B)} + xm.stdi even,b, 8(res) + xm.lddi a,c, 8(D) + { sub a, a, b ; lw even,8 ( B)} + xm.stdi even,a, 16(res) + { sub c, c, a ; lw even,12 ( B)} + xm.stdi even,c, 24(res) + { add D, D, t3 ; addi len, len, -1 } + { add res, res, t3 ; add B, B, t3 } + { add res, res, t3 ; xm.bt len, .L_loop_top } +.L_loop_bot: + +.L_finish: + xm.lddsp s5,s4,8 + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4 /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct6_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct6_s32.S new file mode 100644 index 00000000..0187b7b9 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct6_s32.S @@ -0,0 +1,64 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform an 6-point inverse DCT. + +Computed directly by multiplying by the DCT matrix. + +headroom_t dct6_inverse( + int32_t y[6], + const int32_t x[6]); + +*/ + +#define FUNCTION_NAME dct6_inverse +#define NSTACKWORDS 0 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define y x10 +#define x x11 +#define mask x12 +#define _32 x13 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ +{ li t3, 0 ; li _32, 32 } +{ xm.mkmski mask, 24 ; xm.vsetc t3} +{ nop ; xm.vldc x} +lui t3, %hi(idct6_matrix) + addi t3,t3, %lo(idct6_matrix) +{ nop ; xm.vclrdr } +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +xm.vlsat t3 + xm.vstrpv y, mask +{ nop ; xm.retsp (NSTACKWORDS)*4 } + + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct8_s32.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct8_s32.S new file mode 100644 index 00000000..09cfa0df --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct8_s32.S @@ -0,0 +1,65 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform an 8-point inverse DCT. + +Computed directly by multiplying by the IDCT matrix. + +headroom_t dct8_inverse( + int32_t y[8], + const int32_t x[8]); + +*/ + +#define FUNCTION_NAME dct8_inverse +#define NSTACKWORDS 0 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +#define y x10 +#define x x11 +#define tmp x12 +#define _32 x13 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ +{ li t3, 0 ; li _32, 32 } +{ nop ; xm.vsetc t3} +lui t3, %hi(idct8_matrix) + addi t3,t3, %lo(idct8_matrix) +{ nop ; xm.vclrdr } +{ nop ; xm.vldc x} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +{ add t3, t3, _32 ; xm.vlmaccr0 t3} +xm.vlsat t3 +{ nop ; xm.vstr y} +{ nop ; xm.retsp (NSTACKWORDS)*4 } + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct_adsb.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_adsb.S new file mode 100644 index 00000000..63f3bafa --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_adsb.S @@ -0,0 +1,73 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +void idct_adsb( + int32_t sums[], + int32_t diffs[], + const int32_t head[], + const int32_t tail[], + const unsigned chunks); + +*/ + +#define FUNCTION_NAME idct_adsb +#define NSTACKWORDS 8 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define STK_CHUNKS (NSTACKWORDS+1) + +#define sums x10 +#define diffs x11 +#define s x12 +#define t_tilde x13 +#define chunks x18 +#define _32 x19 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + +//{ li _32, 32 ; lw chunks, (STK_CHUNKS)*4 (sp)} + { li _32, 32 ; nop } + mv chunks, a4 + + +{ li t3, 0 ; nop } +{ mv t3, t_tilde ; xm.vsetc t3} + +.L_loop_top: + { addi chunks, chunks, -1 ; xm.vldr t3} + { add t3, t3, _32 ; xm.vladsb s} + { add s, s, _32 ; xm.vstr sums} + { add sums, sums, _32 ; xm.vstd diffs} + { add diffs, diffs, _32 ; xm.bt chunks, .L_loop_top } +.L_loop_bot: + + xm.lddsp s5,s4,8 + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct_convolve.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_convolve.S new file mode 100644 index 00000000..612b94b7 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_convolve.S @@ -0,0 +1,80 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Applies the convolution needed for the recursive IDCT. + +given x[], the result is + + y[0] = x[0] + y[1:] = 0.5*(y[1:] + y[0:-1]) + +Each "chunk" is 8 elements, so if the data isn't a multiple of 8 elements +you'll need buffer space at the end of the data that can be safely clobbered. + +void idct_convolve( + int32_t y[], + const int32_t x[], + const unsigned chunks); + +*/ + +#define FUNCTION_NAME idct_convolve +#define NSTACKWORDS 4 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + +#define y x10 +#define x x11 +#define chunks x12 +#define _32 x13 +#define tmp x18 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + // This has to start at the end or else values will get clobbered + // early if it's done in-place + + li t3, 0x80 // 32-bit mode with SHR=1 on VLADSB +{ xm.shli s3, chunks, 3 ; xm.vsetc t3} +{ li _32, 32 ; lw tmp,0 ( x)} + sh2add x, s3, x + sh2add y, s3, y +{ sub x, x, _32 ; sub y, y, _32 } +{ addi t3, x, -4 ; nop } + +.L_loop_top: + { addi chunks, chunks, -1 ; xm.vldr t3} + { sub x, x, _32 ; xm.vladsb x} + { sub y, y, _32 ; xm.vstr y} + { addi t3, x, -4 ; xm.bt chunks, .L_loop_top } +.L_loop_bot: + + sw tmp,32 ( y)// y is pointing 8 words before where it started + + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s32/idct_scale.S b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_scale.S new file mode 100644 index 00000000..22110856 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s32/idct_scale.S @@ -0,0 +1,66 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + + + +void idct_scale( + int32_t x[], + const int32_t idct_lut[], + const unsigned chunks, + const right_shift_t shr); + +*/ + +#define FUNCTION_NAME idct_scale +#define NSTACKWORDS 4 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 2 + + +#define x x10 +#define lut x11 +#define chunks x12 +#define shr x13 +#define _32 x18 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + +{ li t3, 0 ; li _32, 32 } +{ mv t3, lut ; xm.vsetc t3} + +.L_loop_top: + { add t3, t3, _32 ; xm.vldr t3} + { nop ; xm.vlmul0 x} + { nop ; xm.vstr x} + xm.vlashr x, shr + { addi chunks, chunks, -1 ; xm.vstr x} + { add x, x, _32 ; xm.bt chunks, .L_loop_top } +.L_loop_bot: + + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageA.S b/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageA.S new file mode 100644 index 00000000..8c0ff228 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageA.S @@ -0,0 +1,187 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform the first step of a 2D 8-by-8 forward or inverse DCT on 8-bit data. + +The first step takes an 8-bit tensor x[8][8] as input and populates a 16-bit +tensor y[8][8] as output. + +The operation is to perform an 8-point DCT on each row of x[][] to get +an intermediate tensor tmp[][], and then populate y[][] with the TRANSPOSE of +tmp[][]. + +Whether the forward or inverse DCT is performed depends on whether the +dct_matrix[][] argument points to dct8_matrix_16bit[][] or +idct8_matrix_16bit[][]. + +headroom_t dct8x8_stageA( + int16_t y[8][8], + const int8_t x[8][8], + const int16_t matrix[8][16]); + +*/ + +#define FUNCTION_NAME dct8x8_stageA +#define NSTACKWORDS 36 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +#define STK_BUFF (NSTACKWORDS - 32-1) +#define STK_LAST_ROW (NSTACKWORDS - 4-1) // will point to last row of 16-bit buffered input matrix + +#define y x10 +#define x x11 +#define mat x12 +# define _16 mat +#define buff x13 +#define count x18 +#define _32 x19 + +// Because a 16-bit DCT matrix is used and 8-bit inputs, the maximum accumulator value is +// 2^24, and we don't want to output anything larger than 2^14 (otherwise dct8x8_part2() +// could saturate the accumulators) so we down-shift the accumulators 10 bits. +.L_sat_vec: .short 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10 /* Translation error on this line: unexpected token at position 12. */ + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + +////// Expand to 16-bits + + li t3, 0x200 // 8-bit mode +{ addi buff,sp, (STK_BUFF)*4 ; xm.vsetc t3} +lui t3, %hi(vpu_vec_0x01) + addi t3,t3, %lo(vpu_vec_0x01) +{ li t3, 16 ; xm.vldc t3} + +{ li _32, 32 ; xm.vclrdr } +{ add count, x, t3 ; xm.vlmacc0 x} +{ add buff, buff, _32 ; xm.vstr buff} + +{ nop ; xm.vclrdr } +{ add count, count, t3 ; xm.vlmacc0 count} +{ add buff, buff, _32 ; xm.vstr buff} + +{ nop ; xm.vclrdr } +{ add count, count, t3 ; xm.vlmacc0 count} +{ add buff, buff, _32 ; xm.vstr buff} + +{ nop ; xm.vclrdr } +{ nop ; xm.vlmacc0 count} +{ nop ; xm.vstr buff} + +////// Perform eight 8-point, 16-bit DCTs + +// The trick here is that we'll transpose while computing the +// output. Instead of loading the row from x[] into vC[], we'll +// load a row from the DCT matrix, and each vlmaccr will apply +// to a different row of x[]. +// Then when we saturate and store that in y[], we'll have +// what would have been the first COLUMN of output as the first +// ROW of output. + +// The other catch is that the data needs to be masked to avoid +// including the wrong stuff in the accumulators. This is easily +// handled by just padding the matrix with 0's (then it will be +// the same size as the 32-bit DCT8 matrix). + +// Finally, we'll compute two rows of output per loop iteration, +// since we have enough accumulators to do so. + +// (also, we don't need the original x[] pointer anymore, so we'll +// put something else in there) +#undef x +#define sat x11 + + li t3, 0x100 // 16-bit mode +{ nop ; xm.vsetc t3} +lui t3, %hi(.L_sat_vec) + addi t3,t3, %lo(.L_sat_vec) +{ li count, 4 ; mv sat, t3 } +{ li _16, 16 ; mv t3, mat } // NOTE: _16 and mat are the same register! +.L_loop_top: + { add t3, t3, _32 ; xm.vclrdr } + { addi buff,sp, (STK_LAST_ROW)*4 ; xm.vldc t3} + + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { sub t3, t3, _32 ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { addi buff,sp, (STK_LAST_ROW)*4 ; xm.vldc t3} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + { nop ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { sub buff, buff, _16 ; nop} + + { addi count, count, -1 ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { add t3, t3, _32 ; nop} + xm.vlsat sat + { add t3, t3, _32 ; xm.vstr y} + { add y, y, _32 ; nop} + bnez count, .L_loop_top +.L_loop_bot: + + xm.lddsp s3,s2,0 + +{ li a0, 15 ; xm.vgetc t3} +{ xm.zexti t3, 5 ; nop } +{ sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageB.S b/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageB.S new file mode 100644 index 00000000..23b13fb4 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/s8/dct8x8_stageB.S @@ -0,0 +1,191 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +Perform the final step of a 2D 8-by-8 forward or inverse DCT on 8-bit data. + +The first step takes an 8-bit tensor x[8][8] as input and populates a 16-bit +tensor y[8][8] as output. The first step is implemented as dct8x8_stageA(). + +The final step takes a 16-bit tensor x[8][8] as input and populates an 8-bit +tensor y[8][8] as output. + +The operation is to perform an 8-point DCT on each row of x[][] to get +an intermediate tensor tmp[][], and then populate y[][] with the TRANSPOSE of +tmp[][]. + +Whether the forward or inverse DCT is performed depends on whether the +matrix[][] argument points to dct8_matrix_16bit[][] or +idct8_matrix_16bit[][]. + +headroom_t dct8_inversex8_stageB( + int8_t y[8][8], + const int16_t x[8][8], + const int16_t matrix[8][16], + const right_shift_t sat); + +*/ + +#define FUNCTION_NAME dct8x8_stageB +#define NSTACKWORDS 40 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +#define STK_BUFF (NSTACKWORDS - 32-1) + +#define y x10 +#define x x11 +#define mat x12 +# define _32 mat +#define buff x13 +#define count x18 +#define A x19 +#define mask x20 +#define _16 x21 +#define sat x22 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + + li t3, 0x100 // 16-bit mode +{ li _16, 16 ; xm.vsetc t3} +{ add a3, a3, _16 ; add t3, a3, _16 } +xm.zip t3, a3, 4 + +// Store VLSAT argument vector in y[] (which won't be needed +// until after all VLSATs are done). + xm.stdi a3,a3, 0(y) + xm.stdi a3,a3, 8(y) + xm.stdi a3,a3, 16(y) + xm.stdi a3,a3, 24(y) + +////// Perform eight 8-point, 16-bit DCTs + +// We'll place the result on the stack as 16-bit values because it +// will be faster than switching between modes while DCTing. +// We'll again do the transpose in-flight. +// The stack space doesn't matter because stageA uses the same amount + +{ li count, 4 ; addi buff,sp, (STK_BUFF)*4 } +{ nop ; li t3, 28 } +// We need to traverse the rows of x[] backwards to get elements +// in the right output order. + sh2add x, t3, x +{ li _32, 32 ; mv t3, mat } // NOTE: _32 and mat are the same register! + +.L_loop_top: + { add t3, t3, _32 ; xm.vclrdr } + { mv A, x ; xm.vldc t3} + + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { sub t3, t3, _32 ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { mv A, x ; xm.vldc t3} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + { nop ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { sub A, A, _16 ; nop} + + { add t3, t3, _32 ; xm.vlmaccr0 A} + xm.vlmaccr1 A + { add t3, t3, _32 ; nop} + xm.vlsat y + { addi count, count, -1 ; xm.vstr buff} + { add buff, buff, _32 ; nop} + bnez count, .L_loop_top +.L_loop_bot: + +// We could get the headroom right now on the 16-bit values, but +// there's a chance that VDEPTH8 causes a value to round away from +// zero in a way that decreases headroom. + +// Reduce depth to 8 bits, moving to y[]. +{ addi t3,sp, (STK_BUFF)*4 ; nop } +{ add t3, t3, _32 ; xm.vldr t3} +{ nop ; xm.vdepth8 } +{ add y, y, _16 ; xm.vstr y} +{ add t3, t3, _32 ; xm.vldr t3} +{ nop ; xm.vdepth8 } +{ add y, y, _16 ; xm.vstr y} +{ add t3, t3, _32 ; xm.vldr t3} +{ xm.mkmski mask, 16 ; xm.vdepth8 } +{ add y, y, _16 ; xm.vstr y} +{ xm.shli t3, _32, 4 /*8-bit mode*/; xm.vldr t3} +{ add _16, _32, _16 ; xm.vdepth8 } + xm.vstrpv y, mask + +// Load/store one last time to get headroom +{ sub y, y, _16 ; xm.vsetc t3} +{ nop ; xm.vldd y} +{ add y, y, _32 ; xm.vstd y} +{ nop ; xm.vldd y} +{ nop ; xm.vstd y} + + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + +{ li a0, 7 ; xm.vgetc t3} +{ xm.zexti t3, 5 ; nop } +{ sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/dct/vect_s32_flip.S b/lib_xcore_math/src/arch/vx4b/dct/vect_s32_flip.S new file mode 100644 index 00000000..71d5eb32 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/dct/vect_s32_flip.S @@ -0,0 +1,54 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) + + +/* + +void vect_s32_flip( + int32_t y[], + const int32_t x[], + const unsigned length); + +*/ + +#define FUNCTION_NAME vect_s32_flip +#define NSTACKWORDS 0 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +#define y x10 +#define x x11 +#define len x12 +#define a x13 +#define b x28 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ +{ addi x, x, -4 ; addi y, y, -8 } +.L_loop_top: + { addi y, y, 4 ; lw a,4 ( x)} + { addi x, x, 4 ; xm.ldw b, len ( x)} + xm.stw a,len(y) /* XAT Warning: "Falling back on assumption: the int < 12 for the integer value of the item at position 2 in the instruction's operands in stwi a, y,len \nMessage: The offset can be encoded in s2rus immediate" */ + { addi len, len, -2 ; sw b,4 ( y)} + { nop ; xm.bt len, .L_loop_top } +.L_loop_bottom: + xm.retsp (NSTACKWORDS)*4 /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/fft/dif_fft.S b/lib_xcore_math/src/arch/vx4b/fft/dif_fft.S new file mode 100644 index 00000000..43c4bcac --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/fft/dif_fft.S @@ -0,0 +1,285 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + void fft_dif_forward ( + complex_s32_t * x, + unsigned n, + headroom_t* hr, + exponent_t* exp); + + void fft_dif_inverse ( + complex_s32_t* x, + unsigned n, + headroom_t* hr, + exponent_t* exp); +*/ + + +#define NSTACKWORDS (64) + +#define STACK_N (8) +#define STACK_EXP (9) + +#define x_p x10 +#define n x11 +#define hr_p x12 +#define exp_minus_one hr_p +#define twiddle_lut_p x13 +#define _32 x18 +#define j x19 +#define k x20 +#define a x21 +#define b x22 +#define exp_modifier x23 +#define s x24 + + +.text +.globl fft_dif_forward +.type fft_dif_forward,@function + +.p2align 2 +fft_dif_forward: + + xm.entsp (NSTACKWORDS)*4 + xm.stdsp x18,x12,0*8 + xm.stdsp x19,x20,1*8 + xm.stdsp x21,x22,2*8 + xm.stdsp x23,x24,3*8 + + { nop ; sw a3, (STACK_EXP)*4 (sp)} + la t3, xmath_dif_fft_lut_size + { li s3, 32 ; lw s2,0 ( t3)} + la t3, xmath_dif_fft_lut + { add twiddle_lut_p, t3, s2 ; xm.shli s2, n, 3 } + { add twiddle_lut_p, twiddle_lut_p, s3 ; nop } + { sub twiddle_lut_p, twiddle_lut_p, s2 ; nop } + +dif_fft_impl_start: + { li s, 31 ; lw t3,0 ( hr_p)} + { sub s, s, t3 ; srli j, n, 2 } + + la t3, fft_hr_lut + + { li _32, 32 ; xm.ldw t3,s(t3)} + { mv exp_modifier, t3 ; xm.vsetc t3} + + { srli s, n, 3 ; sw n, (STACK_N)*4 (sp)} + { mv t3, x_p; ; xm.brff s, dif_fft_last_two_rounds_4_point } + + mul b, n, _32 + { srli b, b, 3 ; mv a, _32 } //astew: `shl b, n, 2` + { srli n, n, 4 ; nop } + + la t3, fft_hr_lut + + + { mv s, t3 ; sub k, b, _32 } + +dif_fft_round_loop: + dif_fft_outer_loop: + { add t3, x_p, k ; mv j, a } + { add twiddle_lut_p, twiddle_lut_p, _32 ; xm.vldc twiddle_lut_p} + + dif_fft_inner_loop: + { add t3, t3, b ; xm.vldr t3} + { sub t3, t3, b ; xm.vladsb t3} + { add t3, t3, b ; xm.vstr t3} + { sub j, j, _32 ; xm.vcmr0 } + { nop ; xm.vcmi0 } + { nop ; xm.vstr t3} + { add t3, t3, b ; xm.bt j, dif_fft_inner_loop } + + { sub k, k, _32 ; xm.bt k, dif_fft_outer_loop } + + { srli b, b, 1 ; xm.vgetc t3} + { xm.shli a, a, 1 ; xm.zexti t3, 5 } + { sub k, b, _32 ; xm.ldw t3, t3 (s)} + { add exp_modifier, exp_modifier, t3 ; xm.vsetc t3} + + { srli n, n, 1 ; xm.bt n, dif_fft_round_loop } + +dif_fft_last_two_rounds: + { mv t3, x_p ; lw n, (STACK_N)*4 (sp)} + { srli j, n, 2 ; nop } + +dif_fft_last_two_rounds_loop: + { nop ; xm.vldr t3} + { addi j, j, -1 ; xm.vftff } + { add t3, t3, _32 ; xm.vstr t3} + + dif_fft_last_two_rounds_4_point: + { nop ; xm.vldr t3} + { addi j, j, -1 ; xm.vftff } + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.bt j, dif_fft_last_two_rounds_loop } + +dif_fft_done: + + //update the hr + { li s, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub s, s, t3 ; nop } + xm.lddsp x18,x12,0*8 + { nop ; sw s,0 ( hr_p)} + + //update the exponent + { nop ; lw t3, (STACK_EXP)*4 (sp)} + { nop ; lw s,0 ( t3)} + + srai exp_modifier, exp_modifier, 16 + { add s, s, exp_modifier ; nop } + { nop ; sw s,0 ( t3)} + + + + xm.lddsp x19,x20,1*8 + xm.lddsp x21,x22,2*8 + xm.lddsp x23,x24,3*8 + + xm.retsp (NSTACKWORDS)*4 + + + .set fft_dif_forward.nstackwords,NSTACKWORDS + .globl fft_dif_forward.nstackwords + .set fft_dif_forward.maxcores,1 + .globl fft_dif_forward.maxcores + .set fft_dif_forward.maxtimers,0 + .globl fft_dif_forward.maxtimers + .set fft_dif_forward.maxchanends,0 + .globl fft_dif_forward.maxchanends +.L_fft_dif_forward: + .size fft_dif_forward, .L_fft_dif_forward-fft_dif_forward + + + + + + + + + + + .text + .globl fft_dif_inverse + .type fft_dif_inverse, @function + +.p2align 2 +fft_dif_inverse: + xm.entsp (NSTACKWORDS)*4 + xm.stdsp x18,x12,0*8 + xm.stdsp x19,x20,1*8 + xm.stdsp x21,x22,2*8 + xm.stdsp x23,x24,3*8 + + { nop ; sw a3, (STACK_EXP)*4 (sp)} + la t3, xmath_dif_fft_lut_size + { li s3, 32 ; lw s2,0 ( t3)} + la t3, xmath_dif_fft_lut + { add twiddle_lut_p, t3, s2 ; xm.shli s2, n, 3 } + { add twiddle_lut_p, twiddle_lut_p, s3 ; nop } + { sub twiddle_lut_p, twiddle_lut_p, s2 ; nop } + +dif_ifft_impl_start: + { li s, 31 ; lw t3,0 ( hr_p)} + { sub s, s, t3 ; srli j, n, 2 } + + la t3, fft_hr_lut + + { li _32, 32 ; xm.ldw t3,s( t3)} + { mv exp_modifier, t3 ; xm.vsetc t3} + + { srli s, n, 3 ; sw n, (STACK_N)*4 (sp)} + { mv t3, x_p; ; xm.brff s, dif_ifft_last_two_rounds_4_point } /* Translation error on this line: unexpected token at position 45. */ + + mul b, n, _32 /* Translation error on this line: unexpected token at position 89. */ + { srli b, b, 3 ; mv a, _32 } + { sub k, b, _32 ; srli n, n, 4 } + + la t3, fft_hr_lut + + { mv s, t3 ; lw exp_minus_one,0 ( t3)} + +dif_ifft_round_loop: + dif_ifft_outer_loop: + { add t3, x_p, k ; mv j, a } + { add twiddle_lut_p, twiddle_lut_p, _32 ; xm.vldc twiddle_lut_p} + + dif_ifft_inner_loop: + { add t3, t3, b ; xm.vldr t3} + { sub t3, t3, b ; xm.vladsb t3} + { add t3, t3, b ; xm.vstr t3 } + { sub j, j, _32 ; xm.vcmcr0 } + { nop ; xm.vcmci0 } + { nop ; xm.vstr t3 } + { add t3, t3, b ; xm.bt j, dif_ifft_inner_loop } + + { sub k, k, _32 ; xm.bt k, dif_ifft_outer_loop } + + { add exp_modifier, exp_modifier, exp_minus_one ;nop } /* Translation error on this line: unexpected token at position 89. */ + { srli b, b, 1 ; xm.vgetc t3} + { xm.shli a, a, 1 ; xm.zexti t3, 5 } + { sub k, b, _32 ; xm.ldw t3, t3 (s)} + { add exp_modifier, exp_modifier, t3 ; xm.vsetc t3} + + { srli n, n, 1 ; xm.bt n, dif_ifft_round_loop } + +dif_ifft_last_two_rounds: + { nop;nop /*TODO make this an align*/ } /* Translation error on this line: unexpected token at position 89. */ + { mv t3, x_p ; lw n, (STACK_N)*4 (sp)} + { srli j, n, 2 ; nop } + +dif_ifft_last_two_rounds_loop: + { nop ; xm.vldr t3} + { addi j, j, -1 ; xm.vftfb } + { add t3, t3, _32 ; xm.vstr t3} + + dif_ifft_last_two_rounds_4_point: + { nop ; xm.vldr t3} + { addi j, j, -1 ; xm.vftfb } + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.bt j, dif_ifft_last_two_rounds_loop } + +dif_ifft_done: + //update the hr + { li s, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub s, s, t3 ; nop } + xm.lddsp x18,x12,0*8 /* Translation error on this line: unexpected token at position 92. */ + { nop ; sw s,0 ( hr_p)} + + //update the exponent + { nop ; lw t3, (STACK_EXP)*4 (sp)} + { nop ; lw s,0 ( t3)} + srai exp_modifier, exp_modifier, 16 + //{ ashr exp_modifier, exp_modifier, 16 } /* Translation error on this line: unexpected token at position 89. */ + addi exp_modifier, exp_modifier, -2 /* Translation error on this line: unexpected token at position 89. */ + { add s, s, exp_modifier ; nop } + { nop ; sw s,0 ( t3)} + + //restore the regs + xm.lddsp x19,x20,1*8 + xm.lddsp x21,x22,2*8 + xm.lddsp x23,x24,3*8 + + xm.retsp (NSTACKWORDS)*4 + + .set fft_dif_inverse.nstackwords,NSTACKWORDS + .globl fft_dif_inverse.nstackwords + .set fft_dif_inverse.maxcores,1 + .globl fft_dif_inverse.maxcores + .set fft_dif_inverse.maxtimers,0 + .globl fft_dif_inverse.maxtimers + .set fft_dif_inverse.maxchanends,0 + .globl fft_dif_inverse.maxchanends +.L_fft_dif_inverse: + .size fft_dif_inverse, .L_fft_dif_inverse-fft_dif_inverse + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/fft/dit_fft.S b/lib_xcore_math/src/arch/vx4b/fft/dit_fft.S new file mode 100644 index 00000000..b1aacd9c --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/fft/dit_fft.S @@ -0,0 +1,316 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + void fft_dit_forward ( + complex_s32_t * x, + unsigned n, + headroom_t* hr, + exponent_t* exp); + + void fft_dit_inverse ( + complex_s32_t* x, + unsigned n, + headroom_t* hr, + exponent_t* exp); +*/ + +#define NSTACKWORDS (32) + +#define STACK_EXP (8) + +#define x_p x10 //astew: Value is constant. Could be thrown on stack to free up a register. +#define n x11 +#define hr_p x12 //astew: register currently only used at very beginning and end. +#define twiddle_lut_p x13 +// #define M x18 +#define _32 x18 + +#define j x19 +#define k x20 + +#define a x21 +#define b x22 + +#define exp_modifier x23 + +#define s x24 +// #define t x28 + +.text +.globl fft_dit_forward +.type fft_dit_forward,@function + +.p2align 2 +fft_dit_forward: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp hr_p,s2,0 + xm.stdsp s4,s3,8 + xm.stdsp s6,s5,16 + xm.stdsp s8,s7,24 + +lui t3, %hi(xmath_dit_fft_lut) + addi t3,t3, %lo(xmath_dit_fft_lut) + { mv twiddle_lut_p, t3 ; sw a3, (STACK_EXP)*4 (sp)} + + { li exp_modifier, 0 ; lw t3,0 ( hr_p)} + { addi t3, t3, -1 ; xm.brff t3, dit_fft_impl_0_bits_hr }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { addi t3, t3, -1 ; xm.brff t3, dit_fft_impl_1_bits_hr }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { addi t3, t3, -1 ; xm.brff t3, dit_fft_impl_2_bits_hr }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { addi t3, t3, -1 ; xm.brff t3, dit_fft_impl_3_bits_hr }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu dit_fft_impl_4_bits_hr } + +#define VEC_SHR 0x80 +#define VEC_SHL 0x40 +#define VEC_SH0 0x00 + +dit_fft_impl_0_bits_hr: + li x28, VEC_SHR /* Translation error on this line: unexpected token at position 48. */ //VEC_SHR + { addi exp_modifier, exp_modifier, 1 ; xm.bu dit_fft_impl_start } + +dit_fft_impl_1_bits_hr: + li x28, VEC_SHR /* Translation error on this line: unexpected token at position 48. */ //VEC_SHR + { addi exp_modifier, exp_modifier, 1 ; xm.bu dit_fft_impl_start } + +dit_fft_impl_2_bits_hr: + li x28, VEC_SHR /* Translation error on this line: unexpected token at position 48. */ //VEC_SHR + { addi exp_modifier, exp_modifier, 1 ; xm.bu dit_fft_impl_start } + +dit_fft_impl_3_bits_hr: + li x28, VEC_SH0 /* Translation error on this line: unexpected token at position 48. */ //VEC_SH0 + { addi exp_modifier, exp_modifier, 0 ; xm.bu dit_fft_impl_start } + +dit_fft_impl_4_bits_hr: + li x28, VEC_SHL /* Translation error on this line: unexpected token at position 48. */ //VEC_SHL + { addi exp_modifier, exp_modifier, -1 ; xm.bu dit_fft_impl_start } + + +dit_fft_impl_start: + // Iterate the dit_fft_first_two_rounds_loop loop n/4 times (via j) because vD holds 4 complex elements + { srli j, n, 2 ; xm.vsetc t3} + // have x28 point at the beginning of the data vector + { mv t3, x_p ; li _32, 32 } + + + dit_fft_first_two_rounds_loop: + // Load 4 complex elements from the data vector (already have indexes bit-reversed) + { nop ; xm.vldd t3} + // Do FFT thing and decrement loop counter + { addi j, j, -1 ; xm.vfttf } + // Write back to data vector, and move to point at next 4 elements + { add t3, t3, _32 ; xm.vstd t3} + // Loop if there's more. Set s to n/4 + { srli s, n, 2 ; xm.bt j, dit_fft_first_two_rounds_loop } + + // s = (n/4)-1; if n == 4, skip the main loop. + { addi s, s, -1 ; nop } + { nop ; xm.brff s, dit_fft_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + // b = 32 ; b will shift left in each iteration of the `dit_fft_round_loop` loop + // a = n / 8 ; a will shift right in each iteration of the `dit_fft_round_loop` loop + // n = n / 16 ; after this we'll do log2(n)+1 executions of `dit_fft_round_loop` + { mv b, _32 ; nop } // <-- astew: seems unnecessary. Can probably drop an indstruction here. + { srli n, n, 4 ; srli a, n, 3 } + + dit_fft_round_loop: + la x28, fft_hr_lut + { mv s, t3 ; xm.vgetc t3} + { nop ; xm.zexti t3, 5 } + { sub k, b, _32 ; xm.ldw t3, t3 (s)} + srai s, t3, 16 + { add exp_modifier, exp_modifier, s ; xm.vsetc t3} + + dit_fft_outer_loop: + // j is our inner loop iterator variable + // set s to point k bytes into the data buffer + { mv j, a ; add s, x_p, k } + { add twiddle_lut_p, twiddle_lut_p, _32 ; xm.vldc twiddle_lut_p} + { add t3, s, b ; nop } ////this might be able to go + + dit_fft_inner_loop: + { nop ; xm.vldd t3} + { nop ; xm.vcmr0 } + { nop ; xm.vcmi0 } + { addi j, j, -1 ; xm.vladsb s} + { add s, s, b ; xm.vstr s } + { add s, s, b ; xm.vstd t3} + { add t3, s, b ; xm.bt j, dit_fft_inner_loop } + + { sub k, k, _32 ; xm.bt k, dit_fft_outer_loop } + + { xm.shli b, b, 1 ; srli a, a, 1 } + { srli n, n, 1 ; xm.bt n, dit_fft_round_loop } + +dit_fft_done: + + //update the hr + { xm.vgetc t3; li s, 31 } + xm.zexti x28, 5 /* Translation error on this line: unexpected token at position 48. */ + sub s, s, t3 + xm.lddsp hr_p,s2,0 + sw s,0( hr_p) + + //update the exponent + { nop ; lw t3, (STACK_EXP)*4 (sp)} + { nop ; lw s,0 ( t3)} + { add s, s, exp_modifier ; nop } + { nop ; sw s,0 ( t3)} + + //restore the regs + xm.lddsp s4,s3,8 + xm.lddsp s6,s5,16 + xm.lddsp s8,s7,24 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + .set fft_dit_forward.nstackwords,NSTACKWORDS + .globl fft_dit_forward.nstackwords + .set fft_dit_forward.maxcores,1 + .globl fft_dit_forward.maxcores + .set fft_dit_forward.maxtimers,0 + .globl fft_dit_forward.maxtimers + .set fft_dit_forward.maxchanends,0 + .globl fft_dit_forward.maxchanends + +.Ltmp0: + .size fft_dit_forward, .Ltmp0-fft_dit_forward + + + + + + + + + + + + + + + .text + .globl fft_dit_inverse + .type fft_dit_inverse, @function + +.p2align 2 +fft_dit_inverse: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp hr_p,s2,0 + xm.stdsp s4,s3,8 + xm.stdsp s6,s5,16 + xm.stdsp s8,s7,24 + +lui t3, %hi(xmath_dit_fft_lut) + addi t3,t3, %lo(xmath_dit_fft_lut) + { mv twiddle_lut_p, t3 ; sw a3, (STACK_EXP)*4 (sp)} + + { li exp_modifier, 0 ; lw t3,0 ( hr_p)} + { addi t3, t3, -1 ; xm.brff t3, dit_ifft_impl_0_bits_hr }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { addi t3, t3, -1 ; xm.brff t3, dit_ifft_impl_1_bits_hr }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { addi t3, t3, -1 ; xm.brff t3, dit_ifft_impl_2_bits_hr }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { addi t3, t3, -1 ; xm.brff t3, dit_ifft_impl_3_bits_hr }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu dit_ifft_impl_4_bits_hr } + + dit_ifft_impl_0_bits_hr: + li x28, 0x80 /* Translation error on this line: unexpected token at position 52. */ //VEC_SHR + { addi exp_modifier, exp_modifier, 1 ; xm.bu dit_ifft_impl_start } + + dit_ifft_impl_1_bits_hr: + li x28, 0x80 /* Translation error on this line: unexpected token at position 52. */ //VEC_SHR + { addi exp_modifier, exp_modifier, 1 ; xm.bu dit_ifft_impl_start } + + dit_ifft_impl_2_bits_hr: + li x28, 0x80 /* Translation error on this line: unexpected token at position 52. */ //VEC_SHR + { addi exp_modifier, exp_modifier, 1 ; xm.bu dit_ifft_impl_start } + + dit_ifft_impl_3_bits_hr: + li x28, 0x00 /* Translation error on this line: unexpected token at position 52. */ //VEC_SH0 + { addi exp_modifier, exp_modifier, 0 ; xm.bu dit_ifft_impl_start } + + dit_ifft_impl_4_bits_hr: + li x28, 0x40 /* Translation error on this line: unexpected token at position 52. */ //VEC_SHL + { addi exp_modifier, exp_modifier, -1 ; xm.bu dit_ifft_impl_start } + +dit_ifft_impl_start: + { srli j, n, 2 ; xm.vsetc t3} + { mv t3, x_p ; xm.ldcu _32, 8*4 } + + +dit_ifft_first_two_rounds_loop: + { nop ; xm.vldd t3} + { addi j, j, -1 ; xm.vfttb } + { add t3, t3, _32 ; xm.vstd t3} + { srli s, n, 2 ; xm.bt j, dit_ifft_first_two_rounds_loop } + + { addi s, s, -1 ; nop } + { addi exp_modifier, exp_modifier, -2 ; xm.brff s, dit_ifft_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + { mv a, n ; mv b, _32 } + { srli a, a, 3 ; srli n, n, 4 } + +dit_ifft_round_loop: + la x28, fft_hr_lut + { mv s, t3 ; xm.vgetc t3} + { xm.zexti t3, 5 ; addi exp_modifier, exp_modifier, -1 } + { sub k, b, _32 ; xm.ldw t3, t3 (s)} + srai s, t3, 16 + { add exp_modifier, exp_modifier, s ; xm.vsetc t3} + + dit_ifft_outer_loop: + { add s, x_p, k ; mv j, a } + { add twiddle_lut_p, twiddle_lut_p, _32 ; xm.vldc twiddle_lut_p} + { add t3, s, b ; nop } ////this might be able to go + + dit_ifft_inner_loop: + { nop ; xm.vldd t3} + { nop ; xm.vcmcr0 } + { nop ; xm.vcmci0 } + { addi j, j, -1 ; xm.vladsb s} + { add s, s, b ; xm.vstr s } + { add s, s, b ; xm.vstd t3} + { add t3, s, b ; xm.bt j, dit_ifft_inner_loop } + + { sub k, k, _32 ; xm.bt k, dit_ifft_outer_loop } + + { srli a, a, 1 ; xm.shli b, b, 1 } + { srli n, n, 1 ; xm.bt n, dit_ifft_round_loop } + +dit_ifft_done: + + //update the hr + { li s, 31 ; xm.vgetc t3} + xm.zexti x28, 5 /* Translation error on this line: unexpected token at position 48. */ + sub s, s, t3 + xm.lddsp hr_p,s2,0 + sw s,0( hr_p) + + //update the exponent + { nop ; lw t3, (STACK_EXP)*4 (sp)} + { nop ; lw s,0 ( t3)} + { add s, s, exp_modifier ; nop } + { nop ; sw s,0 ( t3)} + + //restore the regs + xm.lddsp s4,s3,8 + xm.lddsp s6,s5,16 + xm.lddsp s8,s7,24 + xm.retsp (NSTACKWORDS)*4 /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + .set fft_dit_inverse.nstackwords,NSTACKWORDS + .globl fft_dit_inverse.nstackwords + .set fft_dit_inverse.maxcores,1 + .globl fft_dit_inverse.maxcores + .set fft_dit_inverse.maxtimers,0 + .globl fft_dit_inverse.maxtimers + .set fft_dit_inverse.maxchanends,0 + .globl fft_dit_inverse.maxchanends +.Ltmp1: + .size fft_dit_inverse, .Ltmp1-fft_dit_inverse + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S b/lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S new file mode 100644 index 00000000..72237451 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/fft/fft_hr_lut.S @@ -0,0 +1,24 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +//.section .cp.rodata, "ac", @progbits +.p2align 2 +// In the table below the LSByte indicates the shift behavior +// 0x00 - no shift. 0x40 - left shift. 0x80 - right shift. + +#define NEG1_SHL 0xffff0040 +#define ZERO_SH0 0x00000000 +#define POS1_SHR 0x00010080 + +.global fft_hr_lut +.section .data.fft_hr_lut, "aw" +fft_hr_lut: + .word NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL // 0 - 7 + .word NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL // 8 - 15 + .word NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL // 16 - 23 + .word NEG1_SHL, NEG1_SHL, NEG1_SHL, NEG1_SHL, ZERO_SH0, POS1_SHR, POS1_SHR, POS1_SHR // 24 - 31 + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_index_bit_reversal.S b/lib_xcore_math/src/arch/vx4b/fft/fft_index_bit_reversal.S new file mode 100644 index 00000000..27712a18 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/fft/fft_index_bit_reversal.S @@ -0,0 +1,65 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +/* + +void fft_index_bit_reversal( + complex_s32_t* a, + const unsigned length); +*/ + +#define FUNCTION_NAME fft_index_bit_reversal +#define NSTACKWORDS 8 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4 + xm.stdsp x20, x21, 0 + xm.stdsp x18, x19, 8 + xm.clz a2, a1 + { addi a2, a2, 1 ; srli a1, a1, 1 } + { slli a1, a1, 1 ; xm.bu .L_loop } + +.p2align 4 +.L_loop: + { xm.bitrev a3, a1 ; xm.shl t3, a1, a2 } + { xm.sltu t3, a3, t3 ; xm.shr a3, a3, a2 } + { addi t3, a1, -1 ; xm.brff t3, .L_dontswap } + xm.ldd x18, x19, a1(a0) + xm.ldd x20, x21, a3(a0) + xm.std x18, x19, a3(a0) + xm.std x20, x21, a1(a0) +.L_dontswap: + { xm.bitrev a3, t3 ; xm.shl a1, t3, a2 } + { xm.sltu a1, a3, a1 ; xm.shr a3, a3, a2 } + { addi a1, t3, -1 ; xm.brff a1, .L_dontswap2 } + xm.ldd x18, x19, t3(a0) + xm.ldd x20, x21, a3(a0) + xm.std x18, x19, a3(a0) + xm.std x20, x21, t3(a0) +.L_dontswap2: + { xm.bt a1, .L_loop ; nop } + + xm.lddsp x20, x21, 0 + xm.lddsp x18, x19, 8 + xm.retsp (NSTACKWORDS)*4 + + // RETURN_REG_HOLDER +.set FUNCTION_NAME.nstackwords,NSTACKWORDS +.globl FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1 +.globl FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0 +.globl FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0 +.globl FUNCTION_NAME.maxchanends +.Ltmp0: + .size FUNCTION_NAME, .Ltmp0-FUNCTION_NAME + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_mono_adjust.S b/lib_xcore_math/src/arch/vx4b/fft/fft_mono_adjust.S new file mode 100644 index 00000000..e9c4f1c3 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/fft/fft_mono_adjust.S @@ -0,0 +1,218 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +void fft_mono_adjust( + complex_s32_t* X, + const unsigned N, + const unsigned inverse); +*/ + + +#define FUNCTION_NAME fft_mono_adjust + +#define NSTACKVECTS (4) +#define NSTACKWORDS (32 + 8*(NSTACKVECTS)) + +#define STACK_VEC_TMP_A (NSTACKWORDS-(8*2)) +#define STACK_VEC_TMP_B (NSTACKWORDS-(8*3)) +#define STACK_VEC_TMP_B_CONJ (NSTACKWORDS-(8*4)) +#define STACK_VEC_TMP (NSTACKWORDS-(8*5)) + +#define STACK_X0 (4) +#define STACK_XQ (5) +#define STACK_X (12) +#define STACK_N (13) +#define STACK_W (14) +#define STACK_INV (15) + +#define X x10 +#define N x11 +#define W x12 +#define X_lo x13 +#define X_hi x18 +#define _32 x19 +#define i x20 +#define pos_j_vect x21 +#define ones_vect x22 +#define conj_vect x23 + +.text +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +//.call FUNCTION_NAME, vect_complex_s32_tail_reverse + +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4 + xm.stdsp x18,x19,8 + xm.stdsp x20,x21,16 + xm.stdsp x22,x23,24 + + { li t3, 0 ; sw s8, 4 (sp)} + { addi s2, N, -8 ; sw a2, (STACK_INV)*4 (sp)} + { slli s2, s2, 3 ; xm.vsetc t3} + + // W <-- &xmath_dit_fft_lut[N - 8] + // W <-- xmath_dit_fft_lut + ((N-8)<<3) +lui t3, %hi(xmath_dit_fft_lut) + addi t3,t3, %lo(xmath_dit_fft_lut) + { srli a3, N, 4 ; add W, t3, s2 } + + { srli N, N, 1 ; sw X, (STACK_X)*4 (sp)} + // exception if N < 16. Don't bother using this with really short FFTs. + { xm.assert a3 ; sw N, (STACK_N)*4 (sp)} + + sh2add X, N, X + { srli N, N, 1 ; sw W, (STACK_W)*4 (sp)} + + call vect_complex_s32_tail_reverse + lw X, (STACK_X)*4(sp)/* Multiple XAT warnings: 'LDWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp X, STACK_X\nMessage: The offset can be encoded in sru6 immediate" */ + lw N, (STACK_N)*4(sp)/* Multiple XAT warnings: 'LDWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp N, STACK_N\nMessage: The offset can be encoded in sru6 immediate" */ + lw W, (STACK_W)*4(sp)/* Multiple XAT warnings: 'LDWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp W, STACK_W\nMessage: The offset can be encoded in sru6 immediate" */ + + +.p2align 4 +.L_body: + // the elements at indexes 0 and N/4 will come out of the loop wrong, but we can just store + // X[0] and X[N/2] and fix them after the loop. + { srli i, N, 1 ; nop } + xm.lddi s5,s6, 0(X) + xm.ldd s7,s8, i(X) + xm.stdsp s5,s6,(STACK_X0)*8 + xm.stdsp s7,s8,(STACK_XQ)*8 +lui t3, %hi(vpu_vec_complex_pos_j) + addi t3,t3, %lo(vpu_vec_complex_pos_j) + { mv pos_j_vect, t3 ; nop } +lui t3, %hi(vpu_vec_complex_ones) + addi t3,t3, %lo(vpu_vec_complex_ones) + { mv ones_vect, t3 ; nop } +lui t3, %hi(vpu_vec_complex_conj_op) + addi t3,t3, %lo(vpu_vec_complex_conj_op) + { mv conj_vect, t3 ; li _32, 32 } + + li t3, 0x0080 + { slli t3, N, 2 ; xm.vsetc t3} + { add X_hi, X, t3 ; mv X_lo, X } + { srli i, N, 3 ; lw t3, (STACK_INV)*4 (sp)} + { nop ; xm.brff t3, .L_main_loop }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { mv X_hi, X_lo ; mv X_lo, X_hi } + +.L_main_loop://I want this loop to have 1 mod 4 alignment to eliminate all FNOPs + { addi i, i, -1 ; xm.vldd pos_j_vect} + { sub W, W, _32 ; xm.vldc W} + { nop ; xm.vcmr0 } + + { nop ; xm.vcmi0 } + { addi t3,sp, (STACK_VEC_TMP_A)*4 ; xm.vladsb ones_vect} + { addi t3,sp, (STACK_VEC_TMP_B)*4 ; xm.vstd t3} + { nop ; xm.vstr t3} + + { addi t3,sp, (STACK_VEC_TMP_B_CONJ)*4 ; xm.vlmul0 conj_vect} + { nop ; xm.vstr t3} + { nop ; xm.vldc X_lo} + { nop ; xm.vcmr0 } + + { addi t3,sp, (STACK_VEC_TMP_B)*4 ; xm.vcmi0 } + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vldd t3} + { nop ; xm.vstr t3} + { nop ; xm.vldc X_hi} + + { nop ; xm.vcmcr0 } + { nop ; xm.vcmci0 } + { nop ; xm.vladd t3} + { addi t3,sp, (STACK_VEC_TMP_B_CONJ)*4 ; xm.vldc X_lo} + + { nop ; xm.vldd t3} + { add X_lo, X_lo, _32 ; xm.vstr X_lo} + { nop ; xm.vcmcr0 } + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vcmci0 } + + { addi t3,sp, (STACK_VEC_TMP_A)*4 ; xm.vstr t3} + { nop ; xm.vldc t3} + { nop ; xm.vldd X_hi} + { nop ; xm.vcmcr0 } + + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vcmci0 } + { nop ; xm.vladd t3} + { add X_hi, X_hi, _32 ; xm.vstr X_hi} + { nop ; xm.bt i, .L_main_loop } + + // If we had a LUT which already holds A[k], B[k] and the complex conjugate of B[k], we can do + // it in 23 instructions instead of 31 + + // If it seems worthwhile, could create an alternate version of this function that does it faster, + // plus a function to initialize the needed table at start-up? It can be initialized based on the + // existing FFT table. + + // { ; vldd table_A[0] } + // { sub i, i, 1 ; vldc X_lo[0] } + // { ; vcmr } + // { ; vcmi } + // { ; vstr vec_tmp[0] } + // { ; vldd table_B[0] } + // { ; vldc X_hi[0] } + // { ; vcmcr } + // { ; vcmci } + // { ; vladd vec_tmp[0] } + // { ; vldd table_B_conj[0] } + // { ; vldc X_lo[0] } + // { add X_lo, X_lo, _32 ; vstr X_lo[0] } + // { ; vcmcr } + // { ; vcmci } + // { ; vstr vec_tmp[0] } + // { add table_A, table_A, _32 ; vldc table_A[0] } + // { add table_B, table_B, _32 ; vldd X_hi[0] } + // { add table_B_conj, table_B_conj, _32 ; vcmcr } + // { ; vcmci } + // { ; vladd vec_tmp[0] } + // { add X_hi, X_hi, _32 ; vstr X_hi[0] } + // { ; bt i, .L_something } + + xm.lddsp s5,s6,(STACK_X0)*8 + xm.lddsp s7,s8,(STACK_XQ)*8 + { nop ; lw t3, (STACK_INV)*4 (sp)} + sra s5, s5, t3 + sra s6, s6, t3 + + { add s5, s5, s6 ; sub s6, s5, s6 } + xm.stdi s5,s6, 0(X) + { xm.neg s8, s8 ; srli i, N, 1 } + xm.std s7,s8, i(X) + + +//Finally, reverse the elements again... + sh2add X, N, X + { srli N, N, 1 ; nop } + + call vect_complex_s32_tail_reverse + +.L_finish: + { nop ; lw s8, 4 (sp)} + + xm.lddsp x18,x19,8 + xm.lddsp x20,x21,16 + xm.lddsp x22,x23,24 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,((NSTACKWORDS) + vect_complex_s32_tail_reverse.nstackwords); /* Translation error on this line: unexpected token at position 90. */ +.global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ + +.L_function_end: + .size FUNCTION_NAME, .L_function_end - FUNCTION_NAME + + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_merge.S b/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_merge.S new file mode 100644 index 00000000..124656a3 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_merge.S @@ -0,0 +1,158 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +headroom_t fft_spectra_merge( + complex_s32_t* X, + const unsigned N); +*/ + + + +#define FUNCTION_NAME fft_spectra_merge +#define NSTACKWORDS (16) + +#define XS3_CONFIG_MIN_FFT_LEN (4) + +#define X x10 +#define N x11 + + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li t3, 0 ; sw s8, 4 (sp)} + { srli t3, N, 3 ; xm.vsetc t3} +#if (XS3_CONFIG_MIN_FFT_LEN <= 4) + { nop ; xm.brff t3, .L_fft_length_4 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_pre_boggle } +#endif + + +#if (CONFIG_MIN_FFT_LEN <= 4) +.L_fft_length_4: + + // If the FFT length is 4, just do the work here. This keeps the code below simpler. + { nop ; lw s2,4 ( X)} + { nop ; lw s3,16 ( X)} + { nop ; sw s2,16 /* X[2].re <- X[0].im */ ( X)} + { nop ; sw s3,4 /* X[0].im <- X[2].re */ ( X)} + xm.lddi s2,s3, 8(X) + xm.lddi s4,s5, 24(X) + { sub s8, s2, s5 ; add t3, s3, s4 } + xm.stdi s8,t3, 8(X) + { add s8, s2, s5 ; sub t3, s4, s3 } + xm.stdi s8,t3, 24(X) + { nop ; xm.vldd X} + { nop ; xm.vstd X} + { nop ; xm.vgetc t3} + { mv s2, t3 ; xm.bu .L_finish2 } +.L_finish2: + + + { li a0, 31 ; nop } + { xm.zexti s2, 5 ; nop } + { sub a0, a0, s2 ; lw s8, 4 (sp)} + + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +#endif + +.L_pre_boggle: + +#define DC_re x12 +#define DC_im x13 +#define Ny_re x18 +#define Ny_im x19 + + // Pre-boggle the DC and Nyquist bins so we can do everything on the VPU + // Wait, is it faster to just compute the results and hold onto them...? + + { srli s6, N, 1 ; nop } + xm.lddi DC_re,DC_im, 0(X) + xm.ldd Ny_re,Ny_im, s6(X) + srai DC_re, DC_re, 1 + srai DC_im, DC_im, 1 + srai Ny_re, Ny_re, 1 + srai Ny_im, Ny_im, 1 + { xm.add s7, DC_re, DC_im ; xm.sub t3, Ny_re, Ny_im } + xm.stdi s7,t3, 0(X) + { xm.add s7, Ny_re, Ny_im ; xm.sub t3, DC_im, DC_re } + xm.std s7,t3, s6(X) + + + +#define X_lo x12 +#define X_hi x13 +#define i x18 +#define _32 x19 + + // Now go through and compute the outputs + + sh2add X_hi, N, X + + li x28, 0 /* Translation error on this line: unexpected token at position 92. */ + { srli i, N, 3 ; xm.vsetc t3} +lui t3, %hi(vpu_vec_complex_neg_j) + addi t3,t3, %lo(vpu_vec_complex_neg_j) + { mv X_lo, X ; xm.vldc t3} +lui t3, %hi(vpu_vec_complex_conj_op) + addi t3,t3, %lo(vpu_vec_complex_conj_op) + { li _32, 32 ; xm.bu .L_syzygy } + +.p2align 4 +.L_syzygy: + { addi i, i, -1 ; xm.vldd X_hi} + { nop ; xm.vcmr0 } + { nop ; xm.vcmi0 } + { nop ; xm.vladsb X_lo} + { add X_lo, X_lo, _32 ; xm.vstd X_lo} + { nop ; xm.vlmul0 t3} + { add X_hi, X_hi, _32 ; xm.vstr X_hi} + { nop ; xm.bt i, .L_syzygy } + + + sh2add X, N, X + { srli N, N, 1 ; xm.vgetc t3} + { mv s2, t3 ; nop } + call vect_complex_s32_tail_reverse + + +.L_finish: + + + { li a0, 31 ; nop } + { xm.zexti s2, 5 ; nop } + { sub a0, a0, s2 ; lw s8, 4 (sp)} + + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS + vect_complex_s32_tail_reverse.nstackwords; /* Translation error on this line: unexpected token at position 86. */ + .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 81. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_function_end: + .size FUNCTION_NAME, .L_function_end - FUNCTION_NAME + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_split.S b/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_split.S new file mode 100644 index 00000000..d6300b4e --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/fft/fft_spectra_split.S @@ -0,0 +1,150 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +headroom_t fft_spectra_split( + complex_s32_t* X, + const unsigned N); +*/ + + +#define FUNCTION_NAME fft_spectra_split +#define NSTACKWORDS (32) + +#define X x10 +#define N x11 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li t3, 0 ; sw s8, 4 (sp)} + { srli s8, N, 3 ; xm.vsetc t3} + + #if (XS3_CONFIG_MIN_FFT_LEN <= 4) + { nop ; xm.bt s8, .L_split_the_spectrum } + #endif + +#if (CONFIG_MIN_FFT_LEN <= 4) +.L_fft_length_4: + + // If the FFT length is 4, just do the work here. This keeps the code below simpler. + { xm.mkmski s8, 8 ; nop } + { slli t3, s8, 16 ; lw s2,4 ( X)} + { add s8, s8, t3 ; lw s3,16 ( X)} + { li t3, 1 ; sw s2,16 /* X[2].re <- X[0].im */ ( X)} + { xm.not s8, s8 ; sw s3,4 /* X[0].im <- X[2].re */ ( X)} + xm.vlashr X, t3 + xm.vstrpv X, s8 + xm.lddi s2,s3, 8(X) + xm.lddi s4,s5, 24(X) + { add s8, s2, s4 ; sub t3, s3, s5 } + xm.stdi s8,t3, 8(X) + { add s8, s3, s5 ; sub t3, s4, s2 } + xm.stdi s8,t3, 24(X) + { nop ; xm.vldd X} + { nop ; xm.vstd X} + { nop ; xm.bu .L_finish } + +#endif + + + +.L_split_the_spectrum: + + // First, reverse the tail + { mv s2, X ; mv s3, N } + sh2add X, N, X + { srli N, N, 1 ; nop } + call vect_complex_s32_tail_reverse + { mv X, s2 ; mv N, s3 } + +#define X_lo X +#define i x12 +#define _32 x13 +#define X_hi x18 +#define DC_im x19 +#define DC_re x20 +#define Ny_im x21 +#define Ny_re x22 + + // x = [DC.re - Ny.im, Ny.re + DC.im, DC.re + Ny.im, -Ny.re + DC.im] + + // If I set [X[0].re, X[0].im, X[K].re, X[k].im] to the vector above, then I can just compute + // the results for bins 0 and K along with everything else. Then I'm guaranteed that the number + // of elements is a multiple of 4, which means this loop will have no tail, AND it will have + // captured the headroom of the vector (although it will be the lesser of the lower and upper + // halves) + { li _32, 32 ; srli i, N, 1 } + xm.lddi DC_re,DC_im, 0(X) + xm.ldd Ny_re,Ny_im, i(X) + { sub s7, DC_re, Ny_im ; add t3, DC_im, Ny_re } + xm.stdi s7,t3, 0(X) + { add s7, DC_re, Ny_im ; sub t3, DC_im, Ny_re } + xm.std s7,t3, i (X) + +#undef DC_re +#undef DC_im +#undef Ny_re +#undef Ny_im + +#define conj_vec x19 + + + sh2add X_hi, N, X_lo + li x28, 0x0080 /* Translation error on this line: unexpected token at position 92. */ + { srli i, i, 2 ; xm.vsetc t3} +lui t3, %hi(vpu_vec_complex_neg_j) + addi t3,t3, %lo(vpu_vec_complex_neg_j) + { nop ; xm.vldc t3} +lui t3, %hi(vpu_vec_complex_conj_op) + addi t3,t3, %lo(vpu_vec_complex_conj_op) + { nop ; xm.bu .L_syzygy } + +.p2align 4 +.L_syzygy: + { addi i, i, -1 ; xm.vldr t3} + { nop ; xm.vlmul0 X_hi} + { nop ; xm.vladsb X_lo} + { add X_lo, X_lo, _32 ; xm.vstr X_lo} + { nop ; xm.vcmr0 } + { nop ; xm.vcmi0 } + { add X_hi, X_hi, _32 ; xm.vstr X_hi} + { nop ; xm.bt i, .L_syzygy } + + +.L_finish: + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; lw s8, 4 (sp)} + + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS + vect_complex_s32_tail_reverse.nstackwords; /* Translation error on this line: unexpected token at position 86. */ + .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 81. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_function_end: + .size FUNCTION_NAME, .L_function_end - FUNCTION_NAME + + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/fft/tail_reverse_complex_s32.S b/lib_xcore_math/src/arch/vx4b/fft/tail_reverse_complex_s32.S new file mode 100644 index 00000000..e3215d12 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/fft/tail_reverse_complex_s32.S @@ -0,0 +1,106 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + void vect_complex_s32_tail_reverse( + complex_s32_t* X, + const unsigned N); +*/ + +#include "../asm_helper.h" + +#define NSTACKWORDS (32 + 0) + +#define FUNCTION_NAME vect_complex_s32_tail_reverse + +#define X x10 +#define N x11 +#define X_A x13 +#define X_C x18 +#define mask_A x19 +#define mask_C x20 +#define i x21 +#define zero x22 +#define _16 x23 +#define X_lo x28 + +.text +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li t3, 0 ; srli s7, N, 2 } + { srli t3, N, 2 ; xm.vsetc t3} + { srli t3, N, 3 ; xm.brff t3, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bt t3, .L_big_enough } + + // N = 4, just reverse elements 1 and 3 + xm.lddi a3,s2, 8(X) + xm.lddi s7,t3, 24(X) + xm.stdi a3,s2, 24(X) + xm.stdi s7,t3, 8(X) + tail .L_finish + +.L_big_enough: + +#define X_hi X + + la t3, vpu_vec_zero + { srli i, N, 3 ; mv zero, t3 } + + { xm.mkmski mask_A, 8 ; xm.vclrdr } + { addi X_lo, X, 8 ; slli mask_A, mask_A, 8 } + { li X_A, 32 ; slli mask_C, mask_A, 16 } + sh2add X_hi, N, X + sh2add X_hi, N, X_hi + { li _16, 16 ; sub X_hi, X_hi, X_A } + +.L_rev_loop: + { add X_A, X_hi, _16 ; xm.vldc X_hi} + { xm.sub X_C, X_hi, _16 ; xm.vldr X_lo } + { addi i, i, -1 ; xm.vlmaccr0 zero} + { nop ; xm.vlmaccr0 zero} + { sub X_hi, X_C, _16 ; xm.vstr X_hi} + xm.vstrpv X_A, mask_A + xm.vstrpv X_C, mask_C + + + { xm.add X_A, X_lo, _16 ; xm.vstc X_lo } + { xm.sub X_C, X_lo, _16 ; xm.vldr X_lo } + { nop ; xm.vlmaccr0 zero} + { nop ; xm.vlmaccr0 zero} + { add X_lo, X_A, _16 ; xm.vstr X_lo} + xm.vstrpv X_A, mask_A + xm.vstrpv X_C, mask_C + { nop ; xm.bt i, .L_rev_loop } + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_func_end: + .size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S new file mode 100644 index 00000000..fa638c1a --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S @@ -0,0 +1,155 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + + + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +typedef struct { + unsigned biquad_count; + int32_t state[2][9]; // state[j][k] is the value x_k[j], i.e. x[n-j] of the kth biquad. x[j][8] are outputs of 8th biquad + int32_t coef[5][8]; // coefficients. coef[j][k] is for the kth biquad. j maps to b0,b1,b2,-a1,-a2. +} filter_biquad_s32_t; + +int32_t filter_biquad_s32( + filter_biquad_s32_t* filter, + const int32_t new_sample); +*/ + +#define FUNCTION_NAME filter_biquad_s32 + +#define NSTACKVECS (0) +#define NSTACKWORDS (32+8*NSTACKVECS) + +#define FILT_N 0 +#define FILT_STATE 1 +#define FILT_COEF 19 + +#define COEF_START 32 +#define STATE_START 10 + + +#define state x10 // ![0x%08X] +#define sample x11 // ![%d] +#define coef x12 // ![0x%08X] +#define tmp x13 // ![%d] +#define _32 x18 // ![%d] +#define _36 x19 // ![%d] +#define filter x24 // ![0x%08X] + +.text +.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + { li t3, 0 ; sw s8, 4 (sp)} + { mv filter, a0 ; xm.vsetc t3} + { xm.ldcu tmp, FILT_STATE + STATE_START ; nop } + sh2add state, tmp, filter // state <-- &(filter->state[1][1]) + { xm.ldcu tmp, FILT_COEF + COEF_START ; xm.vclrdr } + sh2add coef, tmp , filter // coef <-- &(filter->coef[4][0]) + + { li _36, 36 ; li _32, 32 } + +// Deal with the b2 and -a2 coefficients before b1 and -a1, so we can overwrite them easily. + + { sub state, state, _36 ; xm.vldc state} + { sub coef, coef, _32 ; xm.vlmacc0 coef} + { add state, state, _32 ; xm.vldc state} + { sub coef, coef, _32 ; xm.vlmacc0 coef} + { sub state, state, _36 ; xm.vldc state} + { sub coef, coef, _32 ; xm.vlmacc0 coef} + { nop ; xm.vldc state} + { sub coef, coef, _32 ; xm.vlmacc0 coef} + + // Now acc[k] = b1[k] * x[n-1][k] + b2[k] * x[n-2][k] - a1[k] * y[n-1][k] - a2[k] * y[n-2][k] + // state = &(filter->state[0][0]) + // coef = &(filter->coef[0][0]) + +#undef _36 +#define N x19 // ![%d] + + // Move filter->state[0][:] to filter->state[1][:] + + { add t3, state, s3 ; xm.vldc state} + { add tmp, state, _32 ; lw N,(FILT_N)*4 ( filter)} + { add t3, t3, _32 ; xm.vstc t3} + { slli N, N, 1 ; lw tmp,0 ( tmp)} + { li tmp, 6 ; sw tmp,0 ( t3)} + + // Place the newest input sample in state[0][0] + { sub N, tmp, N ; sw sample,0 ( state)} + + // Overwrite state[0][1:9] with 0's +lui t3, %hi(vpu_vec_zero) + addi t3,t3, %lo(vpu_vec_zero) + { addi t3, state, 4 ; xm.vldc t3} + { nop ; xm.vstc t3} + + // vC[:] <-- coef[b0][:] + { nop ; xm.vldc coef} + + // Every element in x28[0:8] except for x28[0] is zero, so a VLMACC shouldn't affect them. + // Subsequent VLMACCs will corrupt the accumulators, but The Mask will stop that from being a + // problem. Smokin'! + + // Let's make this more clear. We still haven't MACCed in the terms corresponding to b0, + // but we can't do all of those simultaneously as we did with the others because the x[n-0] + // for one section IS the output of the previous section, which we haven't finished calculating + // yet. So we need to go up the chain of filter sections, computing the output of each to get + // the input to the next. Because we've set the state[0][1:0] to zeros, when we're working on + // the k'th filter section, MACCing against that will not affect accumulators > k. Then we write + // out the output of section k. We do the MACC again, **which will corrupt the accumulators + // which are LESS THAN k.... but that's FINE because we're not going to write them out again. + { xm.mkmski tmp, 4 ; xm.vlmacc0 state} + xm.vstrpv t3, tmp + li N, 0 + { nop ; xm.bru N /* Do N-1 remaining biquads */ } + + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + xm.vstrpv t3, tmp + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + xm.vstrpv t3, tmp + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + xm.vstrpv t3, tmp + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + xm.vstrpv t3, tmp + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + xm.vstrpv t3, tmp + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + xm.vstrpv t3, tmp + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + xm.vstrpv t3, tmp + + // Final vstrpv should have written the output to filt->state[0][N]. filt->state should + // still be pointing at filt->state[0][0] + + lw N,(FILT_N)*4 ( filter) + xm.ldw a0,N ( state) + + +.L_done: + { nop ; lw s8, 4 (sp)} + xm.lddsp s3,s2,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_size_end: + .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S new file mode 100644 index 00000000..4f175d01 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S @@ -0,0 +1,220 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + + + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +typedef struct { + unsigned biquad_count; + int32_t state[2][9]; // state[j][k] is the value x_k[j], i.e. x[n-j] of the kth biquad. x[j][8] are outputs of 8th biquad + int32_t coef[5][8]; // coefficients. coef[j][k] is for the kth biquad. j maps to b0,b1,b2,-a1,-a2. +} filter_biquad_s32_t; + +int32_t filter_biquad_sat_s32( + filter_biquad_s32_t* filter, + const int32_t new_sample); +*/ + +#define FUNCTION_NAME filter_biquad_sat_s32 + +#define NSTACKVECS (2) +#define NSTACKWORDS (10+2+8*NSTACKVECS) + +#define STACK_TMP_VR (NSTACKWORDS - 16-2) +#define STACK_TMP_VD (NSTACKWORDS - 8-2) + +#define FILT_N 0 +#define FILT_STATE 1 +#define FILT_COEF 19 + +#define COEF_START 32 +#define STATE_START 10 + + +#define state x10 // ![0x%08X] +#define sample x11 // ![%d] +#define coef x12 // ![0x%08X] +#define tmp x13 // ![%d] +#define _32 x18 // ![%d] +#define _36 x19 // ![%d] +#define filter x24 // ![0x%08X] + +.text +.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + { li t3, 0 ; sw s8, 4 (sp)} + { mv filter, a0 ; xm.vsetc t3} + { xm.ldcu tmp, FILT_STATE + STATE_START ; nop } + sh2add state, tmp, filter // state <-- &(filter->state[1][1]) + { xm.ldcu tmp, FILT_COEF + COEF_START ; xm.vclrdr } + sh2add coef, tmp, filter // coef <-- &(filter->coef[4][0]) + + { li _36, 36 ; li _32, 32 } + +// Deal with the b2 and -a2 coefficients before b1 and -a1, so we can overwrite them easily. + + { sub state, state, _36 ; xm.vldc state} + { sub coef, coef, _32 ; xm.vlmacc0 coef} + { add state, state, _32 ; xm.vldc state} + { sub coef, coef, _32 ; xm.vlmacc0 coef} + { sub state, state, _36 ; xm.vldc state} + { sub coef, coef, _32 ; xm.vlmacc0 coef} + { nop ; xm.vldc state} + { sub coef, coef, _32 ; xm.vlmacc0 coef} + + // Now acc[k] = b1[k] * x[n-1][k] + b2[k] * x[n-2][k] - a1[k] * y[n-1][k] - a2[k] * y[n-2][k] + // state = &(filter->state[0][0]) + // coef = &(filter->coef[0][0]) + +#undef _36 +#define N x19 // ![%d] + + // Move filter->state[0][:] to filter->state[1][:] + + { add t3, state, s3 ; xm.vldc state} + { add t3, t3, _32 ; xm.vstc t3} + { add tmp, state, _32 ; lw s2,(FILT_N)*4 ( filter)} + { li s3, 6 ; lw tmp,0 ( tmp)} + { li tmp, 6*8 ; sw tmp,0 ( t3)} + mul N, s2, s3 + + // Place the newest input sample in state[0][0] + { sub N, tmp, N ; sw sample,0 ( state)} + +#undef sample +#define zeros x11 + + // Overwrite state[0][1:9] with 0's + lui t3, %hi(vpu_vec_zero) + addi t3,t3, %lo(vpu_vec_zero) + { addi zeros, t3, 0 ; li _32, 32} + { addi t3, state, 4 ; xm.vldc t3} + { nop ; xm.vstc t3} + + // vC[:] <-- coef[b0][:] + { nop ; xm.vldc coef} + +#undef coef +#define state_p1 x12 + + { addi state_p1, t3, 0 ; addi t3,sp, (STACK_TMP_VR)*4 } + + // Every element in x28[0:8] except for x28[0] is zero, so a VLMACC shouldn't affect them. + // Subsequent VLMACCs will corrupt the accumulators, but The Mask will stop that from being a + // problem. Smokin'! + + // Let's make this more clear. We still haven't MACCed in the terms corresponding to b0, + // but we can't do all of those simultaneously as we did with the others because the x[n-0] + // for one section IS the output of the previous section, which we haven't finished calculating + // yet. So we need to go up the chain of filter sections, computing the output of each to get + // the input to the next. Because we've set the state[0][1:0] to zeros, when we're working on + // the k'th filter section, MACCing against that will not affect accumulators > k. Then we write + // out the output of section k. We do the MACC again, **which will corrupt the accumulators + // which are LESS THAN k.... but that's FINE because we're not going to write them out again. + + // instead of using _32 could just use that register as a second pointer for the vD temp stack, + // wouldn't need to recalculate the stack pointer every time, doesn't _ _ + // matter here as we're not using most of the resourse line instructions anyway \(`~`)/ + + { xm.mkmski tmp, 4 ; xm.vlmacc0 state} + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.vstd t3} + xm.vlsat zeros + xm.vstrpv state_p1, tmp +li N, (0) + { nop ; xm.bru N /* Do N-1 remaining biquads */ } + + { sub t3, t3, _32 ; xm.vldd t3} + { nop ; xm.vldr t3} + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.vstd t3} + xm.vlsat zeros + xm.vstrpv state_p1, tmp + + + { sub t3, t3, _32 ; xm.vldd t3} + { nop ; xm.vldr t3} + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.vstd t3} + xm.vlsat zeros + xm.vstrpv state_p1, tmp + + + { sub t3, t3, _32 ; xm.vldd t3} + { nop ; xm.vldr t3} + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.vstd t3} + xm.vlsat zeros + xm.vstrpv state_p1, tmp + + + { sub t3, t3, _32 ; xm.vldd t3} + { nop ; xm.vldr t3} + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.vstd t3} + xm.vlsat zeros + xm.vstrpv state_p1, tmp + + + { sub t3, t3, _32 ; xm.vldd t3} + { nop ; xm.vldr t3} + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.vstd t3} + xm.vlsat zeros + xm.vstrpv state_p1, tmp + + + { sub t3, t3, _32 ; xm.vldd t3} + { nop ; xm.vldr t3} + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.vstd t3} + xm.vlsat zeros + xm.vstrpv state_p1, tmp + + + { sub t3, t3, _32 ; xm.vldd t3} + { nop ; xm.vldr t3} + { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { add t3, t3, _32 ; xm.vstr t3} + { nop ; xm.vstd t3} + xm.vlsat zeros + xm.vstrpv state_p1, tmp + + // Final vstrpv should have written the output to filt->state[0][N]. filt->state should + // still be pointing at filt->state[0][0] + + { nop ; lw N,(FILT_N)*4 ( filter)} + xm.ldw a0,N ( state) + { nop ; lw s8, 4 (sp)} + xm.lddsp s3,s2,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_size_end: + .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S new file mode 100644 index 00000000..cf0ae599 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S @@ -0,0 +1,118 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + + + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +typedef struct { + unsigned num_taps; + right_shift_t shift; + int16_t* coef; + int16_t* state; +} filter_fir_s16_t; + +int16_t filter_fir_s16( + filter_fir_s16_t* filter, + const int16_t new_sample); +*/ + +#define FUNCTION_NAME filter_fir_s16 + +#define NSTACKVECS (2) +#define NSTACKWORDS (12+8*NSTACKVECS) + +#define FILT_N 0 +#define FILT_SHIFT 1 +#define FILT_COEF 2 +#define FILT_STATE 3 + + +#define STACK_VEC_TMP (NSTACKWORDS-8) +#define STACK_VEC_VR (NSTACKWORDS-16) + +#define STACK_FILTER (8) + +#define buff x10 +#define length x11 +#define sample x12 +#define tmpA x13 +#define _32 x18 +#define coef x19 +#define filter x24 + +.text +.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li _32, 32 ; sw s8, 4 (sp)} + { mv filter, a0 ; mv sample, a1 } + { nop ; lw length,(FILT_N)*4 ( filter)} + { nop ; lw buff,(FILT_STATE)*4 ( filter)} + call filter_fir_s16_push_sample_up + { nop ; lw coef,(FILT_COEF)*4 ( filter)} + { nop ; lw buff,(FILT_STATE)*4 ( filter)} + { nop ; lw length,(FILT_N)*4 ( filter)} + { slli t3, _32, 3 ; xm.vclrdr } + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + { mv tmpA, length ; xm.vstd t3} + { xm.zexti tmpA, 4 ; srli length, length, 4 } + { slli tmpA, tmpA, 1 ; xm.brff length, .L_loop_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { add buff, buff, _32 ; xm.vldc buff} + { addi length, length, -1 ; xm.vlmaccr0 coef} + xm.vlmaccr1 coef + { add coef, coef, _32 ; xm.bt length, .L_loop_top } + .L_loop_end: + + { addi a2,sp, (STACK_VEC_VR)*4 ; xm.mkmsk tmpA, tmpA } + { mv t3, buff ; xm.vstr a2} + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vldr t3} + xm.vstrpv t3, tmpA + { li tmpA, 0 ; lw a2,(FILT_SHIFT)*4 ( filter)} + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vldc t3} + { nop ; xm.vldr t3} + { nop ; xm.vlmaccr0 coef} + xm.vlmaccr1 coef + //{ nop ; xm.vadddr } + xm.st16 x12, tmpA(x28) + + xm.vlsat t3 + { nop ; xm.vstr t3} + xm.ld16s x10, tmpA(x28) +.L_done: + { nop ; lw s8, 4 (sp)} + xm.lddsp s7,s6,24 + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS + filter_fir_s16_push_sample_up.nstackwords; /* Translation error on this line: unexpected token at position 86. */ +.global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_size_end: + .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S new file mode 100644 index 00000000..c06f7123 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S @@ -0,0 +1,206 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + + + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +typedef struct { + unsigned num_taps; + unsigned head; + right_shift_t shift; + int32_t* coef; + int32_t* state; +} filter_fir_s32_t; + +int32_t filter_fir_s32( + filter_fir_s32_t* filter, + const int32_t new_sample); +*/ + +#define FUNCTION_NAME filter_fir_s32 + +#define NSTACKVECS (1) +#define NSTACKWORDS (12+8*NSTACKVECS) + +#define FILT_N 0 +#define FILT_HEAD 1 +#define FILT_SHIFT 2 +#define FILT_COEF 3 +#define FILT_STATE 4 + + +#define STACK_VEC_TMP (NSTACKWORDS-12) + + +#define filter x23 +#define sample x11 +#define tmp1 x18 +#define tmp2 x22 + +.text +.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li t3, 0 ; sw s8, 4 (sp)} + + // Set VPU mode to 32-bit + { mv filter, a0 ; xm.vsetc t3} + + +// The field filter->head points to where the newest sample will go, which is probably somewhere in the middle of the +// state vector. This effectively splits the work to be done into two pieces -- the stuff after filter->head, and the +// stuff before it. The stuff after filter->head I'm calling part A (corresponds to lowest coef[] indices). The stuff +// before it I'm calling part B. + +// I'm just going to create two sets of registers, corresponding to each of the two parts. That's what this is. + +#define state_A x10 +#define state_B x19 + +#define N_A x12 +#define N_B x21 + +#define coef_A x11 +#define coef_B x20 + + // Get the current head position, which is also the number of taps in part B + { nop ; lw N_B,(FILT_HEAD)*4 ( filter)} + + // If N_B is currently zero, then the next head is the final index. Otherwise it's just + // the head decremented by 1. + { addi t3, N_B, -1 ; lw N_A,(FILT_N)*4 ( filter)} + { nop ; xm.bt N_B, .L_no_reset } + { addi t3, N_A, -1 ; nop } + + .L_no_reset: + { nop ; sw t3,(FILT_HEAD)*4 ( filter)} + + // Store the newest sample in the state. And grab the rest of the state/coef/N values + { nop ; lw state_B,(FILT_STATE)*4 ( filter)} + sh2add state_A, N_B, state_B + { sub N_A, N_A, N_B ; sw sample,0 ( state_A)} + { slli tmp1, N_A, 2 ; lw coef_A,(FILT_COEF)*4 ( filter)} + sh2add coef_B, N_A, coef_A +#undef sample + + // Each part has its own tail. We'll handle both of those first (by masking the state with zeros), then we'll do the + // bulk of the work after + + { addi s8,sp, (STACK_VEC_TMP)*4 ; xm.vclrdr } + { mv t3, state_A ; xm.vstd s8} + { xm.zexti tmp1, 5 ; xm.vldr t3} + { xm.mkmsk t3, tmp1 ; srli N_A, N_A, 3 } + xm.vstrpv s8, t3 + { mv t3, state_B ; xm.vldc s8} + { slli tmp2, N_B, 2 ; xm.vldr t3} + { xm.zexti tmp2, 5 ; xm.vstd s8} + { xm.mkmsk t3, tmp2 ; srli N_B, N_B, 3 } + xm.vstrpv s8, t3 + { add state_A, state_A, tmp1 ; xm.vclrdr } + { add coef_A, coef_A, tmp1 ; xm.vlmaccr0 coef_A} + { add state_B, state_B, tmp2 ; xm.vldc s8} + { add coef_B, coef_B, tmp2 ; xm.vlmaccr0 coef_B} + +// Now, go back through and do full vectors. + +#undef tmp2 +#define _32 x22 + + tail .L_part_A_start + .p2align 4 + .L_part_A_start: + { li _32, 32 ; xm.brff N_A, .L_part_A_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_part_A_loop_top: + { add state_A, state_A, _32 ; xm.vldc state_A} + { addi N_A, N_A, -1 ; xm.vlmaccr0 coef_A} + { add coef_A, coef_A, _32 ; xm.bt N_A, .L_part_A_loop_top } + .L_part_A_end: +#undef state_A +#undef N_A +#undef coef_A + + .L_part_B_start: + { li _32, 32 ; xm.brff N_B, .L_part_B_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_part_B_loop_top: + { add state_B, state_B, _32 ; xm.vldc state_B} + { addi N_B, N_B, -1 ; xm.vlmaccr0 coef_B} + { add coef_B, coef_B, _32 ; xm.bt N_B, .L_part_B_loop_top } + .L_part_B_end: + +#undef state_B +#undef N_B +#undef coef_B + +// Now combine the 40-bit accumulators, assumes that x24 points to the stack. +// (the logic for this is a too complicated to explain here) +lui t3, %hi(vpu_vec_0x40000000) + addi t3,t3, %lo(vpu_vec_0x40000000) + { nop ; lw a2,(FILT_SHIFT)*4 ( filter)} + { addi s2, a2, -1 ; xm.vldc t3} + { li s3, 1 ; xm.vstr s8} +lui t3, %hi(vpu_vec_0x80000000) + addi t3,t3, %lo(vpu_vec_0x80000000) + { xm.shl s2, s3, s2 ; xm.vlmacc0 t3} +lui t3, %hi(vpu_vec_zero) + addi t3,t3, %lo(vpu_vec_zero) + { li t3, 0 ; xm.vldr t3} + { xm.slt a3, t3, a2 ; xm.vlmaccr0 s8} + + { nop ; xm.vstd x24} + { xm.neg x20, x12 ; nop} +//{ neg s4, a2 ; vstd s8}" + { addi s4, s4, 1 ; xm.vlmaccr0 s8} + { nop ; xm.vstr s8} + +// x11 and x10 will contain a 64-bit result. Left or right-shift that as appropriate. + + xm.lddi a1,a0, 0(s8) + { addi a1, a1, 8 ; xm.brff a3, .L_left_shift }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_right_shift: + // (from the block above): x19 = 1, x18 = 1<<(x12 - 1) + // adding x18*x19 (=x18) to x11:x10 effectively rounds it when we extract it. + xm.maccs a1, a0, s2, s3 + xm.lsats a1, a0, a2 + xm.lextract a0, a1, a0, a2, 32 + { nop ; xm.bu .L_done } + + .L_left_shift: + // (from the block above): x19 = 1, x20 = -x12 + 1, x28 = 0 + // If we're left-shifting (or zero-shifting), we still need to saturate to q31. + // lsats has a bug which doesn't allow to use it with 0, so we'll have to + // add 1 to our shift, left-shift, saturate and extract with 1, no need to round here. + { xm.shl a1, a1, s4 ; nop } + xm.linsert a1, t3, a0, s4, 32 + xm.lsats a1, t3, s3 + xm.lextract a0, a1, t3, s3, 32 + +.L_done: + xm.lddsp s7,s6,24 + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 + { nop ; lw s8, 4 (sp)} + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_size_end: + .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME + +#undef FUNCTION_NAME + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S b/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S new file mode 100644 index 00000000..96235de8 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S @@ -0,0 +1,127 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + + + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +Push a sample into the buffer, moving everything 1 index up. + +void filter_fir_s16_push_sample_down( + int16_t* buffer, + const unsigned length, + const int16_t new_value); +*/ + +#define FUNCTION_NAME filter_fir_s16_push_sample_down + +#define NSTACKVECS (1) +#define NSTACKWORDS (12+8*NSTACKVECS) + + + +#define STACK_VEC_TMP (NSTACKWORDS-8) + + +#define buff x10 +#define length x11 +#define value x12 +#define _60 x13 +#define mask x18 +#define tail_start x19 +#define buff_end x20 +#define buffD x21 +#define tmp x24 + +.text +.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li a3, 32 ; sw s8, 4 (sp)} + + { slli t3, a3, 3 ; li mask, 28 /*28 samples at a time*/ } + { xm.mkmsk mask, mask ; xm.vsetc t3} + + // We're going to be moving 28 samples per loop iteration. The last address at which we + // can move 28 samples is 56 bytes before the end of the buffer. The end of the buffer is + // at buff + 2*length. + + { slli tail_start, length, 1 ; li t3, 56 } + { add buff_end, buff, tail_start ; slli mask, mask, 4 } + { sub tail_start, buff_end, t3 ; addi _60, t3, 4 } + + { mv t3, buff ; xm.sltu tmp, tail_start, buff } + { li tmp, 28 ; xm.bt tmp, .L_loop_end } + { add buffD, buff, tmp ; xm.bu .L_loop_top } + + .p2align 4 // Does this loop have an FNOP after the first iteration? It all fits in the instruction buffer.. + .L_loop_top: + { addi buff, t3, -4 ; xm.vldr t3} + { add t3, buff, _60 ; xm.vldd buffD} + { addi buffD, buffD, -4 ; xm.vlmaccr0 buff} + xm.vlmaccr1 buff + { xm.sltu tmp, tail_start, t3 ; xm.vstd buffD} + xm.vstrpv buff , mask + { add buffD, buffD, _60 ; xm.bt tmp, .L_loop_end } + {nop ; xm.bu .L_loop_top } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_loop_end: + +#undef _60 + + // x28 holds the address of the next sample to be moved. + { sub length, buff_end, t3 ; li tmp, 29 } + { xm.sltu tmp, length, tmp ; li a3, 28 } + { nop ; xm.bt tmp, .L_skippp } + { addi buff, t3, -4 ; xm.vldr t3} + { nop ; xm.vlmaccr0 t3} + xm.vlmaccr1 t3 + { add t3, t3, a3 ; nop} + xm.vstrpv buff, mask + + + .L_skippp: + { sub length, buff_end, t3 ; nop } + { li a3, 0 ; xm.vldr t3} + { xm.mkmsk tmp, length ; addi buff, t3, -4 } + { nop ; xm.vlmaccr0 t3} + xm.vlmaccr1 t3 + { add t3, t3, length ; nop} + { addi t3, t3, -2 ; slli tmp, tmp, 4 } + xm.vstrpv buff, tmp + + xm.st16 value, a3(t3) + //xm.st16 value, t3(a3) + +.L_done: + xm.lddsp s7,s6,24 + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 + { nop ; lw s8, 4 (sp)} + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_size_end: + .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S b/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S new file mode 100644 index 00000000..c7073b05 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S @@ -0,0 +1,152 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + + + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +Push a sample into the buffer, moving everything 1 index up. + +void filter_fir_s16_push_sample_up( + int16_t* buffer, + const unsigned length, + const int16_t new_value); +*/ + +#define FUNCTION_NAME filter_fir_s16_push_sample_up + +#define NSTACKVECS (1) +#define NSTACKWORDS (12+8*NSTACKVECS) + + + +#define STACK_VEC_TMP (NSTACKWORDS-8) + + +#define buff_start x10 +#define length x11 +#define value x12 +#define tmpB x13 +#define mask x18 +#define buffR x19 +#define tmpC x20 +#define buffD x21 +#define tmp x24 + +.text +.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.type FUNCTION_NAME,@function +.p2align 4 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li tmpB, 32 ; sw s8, 4 (sp)} + + { slli t3, tmpB, 3 ; xm.mkmski mask, 32 } + { mv tmp, length ; xm.vsetc t3} + +// If the number of samples is odd, pretend it was one larger. If it's even, move the +// final sample without the VPU. + + xm.zexti tmp, 1 + xm.eq buffR, length, 1 + { add length, length, tmp ; xm.bt buffR, .L_write_new_sample } + { addi tmp, length, -2 ; xm.bt tmp, .L_odd_samps } +.L_even_samps: + // xm.ld16s buffD, buff_start(tmp) + xm.ld16s buffD, tmp(buff_start) + addi tmp, tmp, 1 + //xm.st16 buffD, buff_start(tmp) + xm.st16 buffD, tmp(buff_start) +.L_odd_samps: + + { slli mask, mask, 4 ; slli length, length, 1 } + +// buffR <-- first byte after buff[] +// mask <-- 0xFFFFFFF0 + { add buffR, buff_start, length ; nop } + +// Move buffD and buffR to point to: + { sub buffR, buffR, tmpB ; li tmpB, 28 } + { sub buffD, buffR, tmpB ; srli mask, mask, 2 } + +// If (buffD < buff_start) then skip the loop. + { mv t3, buffR ; xm.sltu tmp, buffD, buff_start } + { li tmpB, 56 ; xm.bt tmp, .L_loop_end } + { nop ; xm.bu .L_loop_top } + +// Do the loop. Align to 16 bytes so that we hopefully don't have FNOPs after the first +// iteration. + .p2align 4 + .L_loop_top: + { mv buffR, buffD ; xm.vldr t3} + { sub buffD, buffD, tmpB ; xm.vldd buffD} + { xm.sltu tmp, buffD, buff_start ; xm.vlmaccr0 t3} + xm.vlmaccr1 t3 + xm.vstrpv t3, mask + { nop ; xm.vstd buffR} + // { sub t3, t3, tmpB ; xm.brff tmp, .L_loop_top }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { sub t3, t3, tmpB ; xm.bt tmp, .L_loop_end } + {xm.bu .L_loop_top ;nop} + .L_loop_end: + + + // If (x28 < buff_start ) we CANNOT do another vector (just vR[]) using the same + // mask. Otherwise, we can. + + { xm.sltu tmp, t3, buff_start ; nop } + { mv buffR, t3 ; xm.bt tmp, .L_skippp } + { li tmpB, 28 ; xm.vldr t3} + { sub t3, t3, tmpB ; xm.vlmaccr0 buffR} + xm.vlmaccr1 buffR + xm.vstrpv buffR, mask + +.L_skippp: + // Now we have less than 1 vector (14 samples) to shift. They'll be at the end of + // the vector when we load x28. Everything after buff_start. + + { sub length, buff_start, t3 ; xm.mkmski tmpC, 2 } + { xm.mkmski mask, 32 ; xm.bitrev tmpC, tmpC } + + { xm.shl mask, mask, length; xm.vldr t3} + + xm.andnot mask, tmpC + {nop; xm.vlmaccr0 x28 } + xm.vlmaccr1 x28 + + xm.vstrpv t3, mask + +.L_write_new_sample: + { li tmpC, 0 ; nop } + // xm.st16 value, buff_start(tmpC) + xm.st16 value, tmpC(buff_start) +.L_done: + xm.lddsp s7,s6,24 + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 + { nop ; lw s8, 4 (sp)} + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_size_end: + .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S b/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S new file mode 100644 index 00000000..49ee937e --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S @@ -0,0 +1,130 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +headroom_t vect_s32_convolve_valid( + int32_t signal_out[], + const int32_t signal_in[], + const int32_t filter_q30[], + const unsigned signal_in_length, + const unsigned filter_taps); + +*/ + +// #include "../asm_helper.h" + +#define NSTACKVECTS (2) +#define NSTACKWORDS (16 + 8*NSTACKVECTS+4) + +#define FUNCTION_NAME vect_s32_convolve_valid + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + + +#define sig_out x10 +#define sig_in x11 +#define filter x12 +#define len x13 + +#define tmpA x18 +#define _32 x19 +#define vec_tmp x20 +#define tmpB x21 + + +#define P filter // P = (filter_taps >> 1) + + + +.text; .issue_mode dual /* Translation error on this line: unexpected token at position 5. */ +.p2align 2 + + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + ////// Set mode to 32-bit + { li t3, 0 ; sw s8, 4 (sp)} + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + + ////// Move the filter coefficients into vC[] + mv tmpB, a4 + { mv t3, filter ; nop} + { slli tmpA, tmpB, 2 ; xm.vclrdr } + { xm.mkmsk tmpA, tmpA ; xm.vstd vec_tmp} + { srli P, tmpB, 1 ; xm.vldr t3} + xm.vstrpv vec_tmp, tmpA + { sub len, len, P ; xm.vldc vec_tmp} + { sub len, len, P ; li _32, 32 } + + // Number of output elements is sig_in_length - (2 * (filter_taps >> 1)) = sig_in_length - 2*P + + { srli t3, len, 3 ; add sig_in, sig_in, _32 } + { addi sig_in, sig_in, -4 ; xm.brff t3, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { addi len, len, -8 ; xm.vclrdr } + { addi t3, sig_in, -4 ; xm.vlmaccr0 sig_in} + { addi t3, t3, -4 ; xm.vlmaccr0 t3} + { addi t3, t3, -4 ; xm.vlmaccr0 t3} + { addi t3, t3, -4 ; xm.vlmaccr0 t3} + { addi t3, t3, -4 ; xm.vlmaccr0 t3} + { addi t3, t3, -4 ; xm.vlmaccr0 t3} + { addi t3, t3, -4 ; xm.vlmaccr0 t3} + { srli t3, len, 3 ; xm.vlmaccr0 t3} + { add sig_in, sig_in, _32 ; xm.vstr sig_out} + { add sig_out, sig_out, _32 ; xm.bt t3, .L_loop_top } + .L_loop_bot: + +// If there is a tail, then len will be non-zero. +// In that case, there are len elements left to VLMACCR, but sig_in[] currently points to the last +// element of the group, assuming a full 8 elements are to be output. But of course the tail must, +// by definition, be fewer than 8 elements. So sig_in[] needs to be offset: +// sig_in <-- sig_in - 4*(8 - len) = sig_in - 32 + 4*len + + { slli len, len, 2 ; xm.brff len, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + { sub sig_in, sig_in, _32 ; xm.vclrdr } + { xm.mkmsk tmpA, len ; add sig_in, sig_in, len } + + .L_tail_loop: + { addi len, len, -4 ; xm.vlmaccr0 sig_in} + { addi sig_in, sig_in, -4 ; xm.bt len, .L_tail_loop } + .L_tail_loop_bot: + + xm.vstrpv sig_out, tmpA + { nop ; xm.vstr vec_tmp} + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S b/lib_xcore_math/src/arch/vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S new file mode 100644 index 00000000..01a4cc37 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/matrix/mat_mul_s8_x_s8_yield_s32.S @@ -0,0 +1,128 @@ +// Copyright 2021-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + + M_rows MUST be a multiple of 16, and N_cols MUST be a multiple of 32 + +void mat_mul_s8_x_s8_yield_s32 ( + split_acc_s32_t accumulators[], + const int8_t matrix[], + const int8_t input_vect[], + const unsigned M_rows, + const unsigned N_cols); +*/ + + +#include "../asm_helper.h" + +#define NSTACKWORDS (12+8+4) + +#define FUNCTION_NAME mat_mul_s8_x_s8_yield_s32 + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_M_ROWS (9) +#define STACK_INPUT_VECT (9) + +#define accs x10 +#define matrix x11 +#define vector x12 +#define rows_left x13 +#define cols_left x18 +#define _32 x19 +#define N_cols x20 +#define mat_stride_B x21 +#define mat_stride_C x22 + +#define K x24 + +.text +.p2align 2 + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x200 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li _32, 32 ; sw s8, 4 (sp)} + { li s8, 15 ; xm.vsetc t3} + add rows_left, rows_left, s8 + mv N_cols, a4 + { srli rows_left, rows_left, 4 ; nop} + { slli mat_stride_B, N_cols, 4 ; mv K, N_cols } + { sub mat_stride_C, mat_stride_B, N_cols ; li s7, 31 } + { add mat_stride_B, mat_stride_C, _32 ; sw vector, (STACK_INPUT_VECT)*4 (sp)} + + { xm.zexti K, 5 ; add s7, N_cols, s7 } + { sub K, _32, K ; srli s7, s7, 5 } + { xm.zexti K, 5 ; nop } + { add matrix, matrix, K ; sub mat_stride_C, mat_stride_C, K } + mv a4, N_cols + { slli K, s7, 5 ; nop} + + { add matrix, matrix, mat_stride_C ; xm.bu .L_output_group_top } + + .p2align 4 + .L_output_group_top: + { add t3, accs, _32 ; xm.vldd accs} + { mv cols_left, K ; xm.vldr t3} + { addi rows_left, rows_left, -1 ; lw vector, (STACK_INPUT_VECT)*4 (sp)} + + .L_input_group_top: + { add vector, vector, _32 ; xm.vldc vector} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub matrix, matrix, N_cols ; xm.vlmaccr0 matrix} + { sub cols_left, cols_left, _32 ; xm.vlmaccr0 matrix} + { add matrix, matrix, mat_stride_B ; xm.bt cols_left, .L_input_group_top } + .L_input_group_bottom: + + { add accs, accs, _32 ; xm.vstd accs} + { add accs, accs, _32 ; xm.vstr accs} + { add matrix, matrix, mat_stride_C ; xm.bt rows_left, .L_output_group_top } + .L_output_group_bottom: + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + { nop ; lw s8, 4 (sp)} + { nop ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/misc/chunk_float_s32_log.S b/lib_xcore_math/src/arch/vx4b/misc/chunk_float_s32_log.S new file mode 100644 index 00000000..2e4a617c --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/misc/chunk_float_s32_log.S @@ -0,0 +1,184 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +Condition: 0 < ldexp(b[k], -30) < 2 + + +void chunk_float_s32_log( + q8_24 a[], + const float_s32_t b[]); +*/ + + +#define NSTACKWORDS (8+48+4) + +#define FUNCTION_NAME chunk_float_s32_log + +#define SP_VEC_X1 ((NSTACKWORDS) - 8 -4) +#define SP_VEC_X2 ((NSTACKWORDS) - 16-4) +#define SP_VEC_X3 ((NSTACKWORDS) - 24-4) +#define SP_VEC_X4 ((NSTACKWORDS) - 32-4) +#define SP_VEC_X5 ((NSTACKWORDS) - 40-4) +#define SP_VEC_X6 ((NSTACKWORDS) - 48-4) + + +.text +.p2align 2 + +.L_ps_coef1: .word -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000, -0x800000 /* Translation error on this line: unexpected token at position 13. */ +.L_ps_coef2: .word 0x555555, 0x555555, 0x555555, 0x555555, 0x555555, 0x555555, 0x555555, 0x555555 /* Translation error on this line: unexpected token at position 13. */ +.L_ps_coef3: .word -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000, -0x400000 /* Translation error on this line: unexpected token at position 13. */ +.L_ps_coef4: .word 0x333333, 0x333333, 0x333333, 0x333333, 0x333333, 0x333333, 0x333333, 0x333333 /* Translation error on this line: unexpected token at position 13. */ +.L_ps_coef5: .word -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab, -0x2aaaab /* Translation error on this line: unexpected token at position 13. */ + +.L_ln_2: .word 0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe,0x2c5c85fe /* Translation error on this line: unexpected token at position 9. */ + + +#define a x10 +#define b x11 +#define mantA x28 +#define expA x12 +#define mantB x13 +#define expB x18 +#define tmpA x19 +#define tmpB x20 +#define vec_x x21 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + +{ addi vec_x,sp, (SP_VEC_X1)*4 ; nop } + + xm.lddi mantA,expA, 0(b) + xm.lddi mantB,expB, 8(b) +{ xm.cls tmpA, mantA ; nop } +{ nop ; xm.cls tmpB, mantB } +{ sub expA, expA, tmpA ; sub expB, expB, tmpB } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi expA,expB, 0(a) + xm.stdi mantA,mantB, 0(vec_x) + + xm.lddi mantA,expA, 16(b) + xm.lddi mantB,expB, 24(b) + +{ xm.cls tmpA, mantA ; nop } +{ nop ; xm.cls tmpB, mantB } +{ sub expA, expA, tmpA ; sub expB, expB, tmpB } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi expA,expB, 8(a) + xm.stdi mantA,mantB, 8(vec_x) + + xm.lddi mantA,expA, 32(b) + xm.lddi mantB,expB, 40(b) +{ xm.cls tmpA, mantA ; nop } +{ nop ; xm.cls tmpB, mantB } +{ sub expA, expA, tmpA ; sub expB, expB, tmpB } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi expA,expB, 16(a) + xm.stdi mantA,mantB, 16(vec_x) + + xm.lddi mantA,expA, 48(b) + xm.lddi mantB,expB, 56(b) +{ xm.cls tmpA, mantA ; nop } +{ nop ; xm.cls tmpB, mantB } +{ sub expA, expA, tmpA ; sub expB, expB, tmpB } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi expA,expB, 24(a) + xm.stdi mantA,mantB, 24(vec_x) + +{ li t3, 0 ; nop } +{ li tmpA, 24 ; xm.vsetc t3} + + la t3, vpu_vec_0x20000000 +{ nop ; xm.vclrdr } +{ xm.neg tmpA, tmpA ; nop } + xm.vlashr a, tmpA +{ xm.ldap t3, .L_ln_2 ; xm.vladd t3} +{ nop ; xm.vlmul0 t3} + +{ li t3, 0 ; xm.vstr a} + xm.vlashr vec_x, t3 + la t3, vpu_vec_0x00000002 +xm.vlsat t3 + la t3, vpu_vec_neg_0x40000000 +{ addi tmpB,sp, (SP_VEC_X1)*4 ; xm.vladd t3} + +#undef mantA +#undef expA +#undef mantB +#undef expB + +{ addi vec_x,sp, (SP_VEC_X2)*4 ; xm.vstr vec_x} +{ nop ; xm.vlmul0 tmpB} // (x-1.0)^2 +{ addi vec_x,sp, (SP_VEC_X3)*4 ; xm.vstr vec_x} +{ nop ; xm.vlmul0 tmpB} // (x-1.0)^3 +{ addi vec_x,sp, (SP_VEC_X4)*4 ; xm.vstr vec_x} +{ nop ; xm.vlmul0 tmpB} // (x-1.0)^4 +{ addi vec_x,sp, (SP_VEC_X5)*4 ; xm.vstr vec_x} +{ li tmpA, 6 ; xm.vlmul0 tmpB} // (x-1.0)^5 +{ addi vec_x,sp, (SP_VEC_X6)*4 ; xm.vstr vec_x} +{ xm.ldap t3, .L_ps_coef5 ; xm.vlmul0 tmpB} // (x-1.0)^6 +{ addi tmpB,sp, (SP_VEC_X1)*4 ; xm.vstr vec_x} + + xm.vlashr tmpB, tmpA // vR[] = coef[0] * x +{ xm.ldap t3, .L_ps_coef4 ; xm.vldc t3} // vC[] = coef[5] +{ addi vec_x,sp, (SP_VEC_X5)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[5] * x^6 +{ xm.ldap t3, .L_ps_coef3 ; xm.vldc t3} // vC[] = coef[4] +{ addi vec_x,sp, (SP_VEC_X4)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[4] * x^5 +{ xm.ldap t3, .L_ps_coef2 ; xm.vldc t3} // vC[] = coef[3] +{ addi vec_x,sp, (SP_VEC_X3)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[3] * x^4 +{ xm.ldap t3, .L_ps_coef1 ; xm.vldc t3} // vC[] = coef[2] +{ addi vec_x,sp, (SP_VEC_X2)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[2] * x^3 +{ nop ; xm.vldc t3} // vC[] = coef[1] +{ addi vec_x,sp, (SP_VEC_X1)*4 ; xm.vlmacc0 vec_x} // vR[] += coef[1] * x^2 + +{ nop ; xm.vladd a} +{ li tmpA, 1 ; xm.vstr a} + +// Any inputs that were 0 should become INT32_MIN +la t3, vpu_vec_0x7FFFFFFF + xm.vlashr t3, tmpA +{ nop ; xm.vladd vec_x} +{ nop ; xm.vdepth1 } +{ nop ; xm.vstr vec_x} +{ nop ; lw tmpA,0 ( vec_x)} +{ mv tmpB, tmpA ; nop } + xm.zip tmpB, tmpA, 0 + mv tmpB, tmpA + xm.zip tmpB, tmpA, 0 + + la t3, vpu_vec_0x80000000 +{ nop ; xm.vldr t3} + xm.vstrpv a, tmpA + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/misc/util.S b/lib_xcore_math/src/arch/vx4b/misc/util.S new file mode 100644 index 00000000..a4dbe488 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/misc/util.S @@ -0,0 +1,103 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text + +.align 4; /* Translation error on this line: unexpected token at position 8. */ +/* +void f32_unpack( + int32_t* mantissa, + exponent_t* exp, + float input); +*/ +#define FUNCTION_NAME f32_unpack +#define NSTACKWORDS (0) + +#define mant_out x10 +#define exp_out x11 +#define input x12 +#define sign x13 +#define exp x28 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.fsexp sign, exp, input + xm.fmant input, input + +#undef input +#define mant x12 + + // interesting way of subtracting 23 without using registers + addi exp, exp, -24 +{ addi exp, exp, 1 ; xm.brff sign, .L_xuf_lblA }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +{ xm.neg mant, mant ; nop } + .L_xuf_lblA: +{ nop ; sw mant,0 ( mant_out)} +{ nop ; sw exp,0 ( exp_out)} + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ +.L_func_end_unpack: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME +#undef mant_out +#undef exp_out +#undef mant +#undef exp +#undef sign + + + +/* +float s32_to_f32( + const int32_t mantissa, + const exponent_t exp); +*/ +#define FUNCTION_NAME s32_to_f32 +#define NSTACKWORDS (0) + +#define mant x10 +#define exp x11 +#define sign x12 +#define zero x13 +#define tmp x28 + +.align 4; /* Translation error on this line: unexpected token at position 8. */ +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + srai sign, mant, 31 + { li tmp, 23 ; li zero, 0 } + { add exp, exp, tmp ; xm.brff sign, .L_pack_not_neg }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.neg mant, mant + .L_pack_not_neg: + xm.fmake mant, sign, exp, zero, mant + { nop ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end_pack: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end_pack - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/misc/vect_copy.S b/lib_xcore_math/src/arch/vx4b/misc/vect_copy.S new file mode 100644 index 00000000..ecb29559 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/misc/vect_copy.S @@ -0,0 +1,60 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +headroom_t vect_s32_copy( + int32_t a[], + const int32_t b[], + unsigned length); +*/ + +#define NSTACKWORDS (0) +#define FUNCTION_NAME vect_s32_copy + +#define a x10 +#define b x11 +#define len x12 +#define tmp x13 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; srli tmp, len, 3 } + { xm.zexti tmp, 1 ; srli len, tmp, 1 } + { add len, len, tmp ; xm.vsetc t3} + { li t3, 32 ; xm.bt tmp, .L_loop_mid } + { nop ; xm.bu .L_loop_top } + + .p2align 4 + .L_loop_top: + { add b, b, t3 ; xm.vldd b} + { add a, a, t3 ; xm.vstd a} + .L_loop_mid: + { add b, b, t3 ; xm.vldd b} + { addi len, len, -1 ; xm.vstd a} + { add a, a, t3 ; xm.bt len, .L_loop_top } + .L_loop_bot: + +.L_finish: + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/misc/vect_float_s32_ln_prepare.S b/lib_xcore_math/src/arch/vx4b/misc/vect_float_s32_ln_prepare.S new file mode 100644 index 00000000..20d881c8 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/misc/vect_float_s32_ln_prepare.S @@ -0,0 +1,122 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +void vect_float_s32_ln_prepare( + q2_30 a[8], + q8_24 exp_mod[8], + const float_s32_t b[]); +*/ + + +#define NSTACKWORDS (8) + +#define FUNCTION_NAME vect_float_s32_ln_prepare + + +#define a x10 +#define exp_mod x11 +#define b x12 +#define mantA x13 +#define expA x18 +#define mantB x19 +#define expB x20 +#define tmpA x21 +#define tmpB x22 +#define tmpC x23 + +.text +.p2align 2 + + + +.L_32_Q24: +.word 0x20000000,0x20000000,0x20000000,0x20000000,0x20000000,0x20000000,0x20000000,0x20000000 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + +{ li t3, 0 ; li tmpC, 24 } +{ li t3, 1 ; xm.vsetc t3} + + xm.lddi mantA,expA, 0(b) + xm.lddi mantB,expB, 8(b) +{ xm.cls tmpA, mantA ; nop } +{ nop ; xm.cls tmpB, mantB } +{ sub expA, expA, tmpA ; sub expB, expB, tmpB } +{ addi tmpA, tmpA, -1 ; addi tmpB, tmpB, -1 } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi expA,expB, 0(exp_mod) + xm.stdi mantA,mantB, 0(a) + + xm.lddi mantA,expA, 16(b) + xm.lddi mantB,expB, 24(b) +{ xm.cls tmpA, mantA ; nop } +{ nop ; xm.cls tmpB, mantB } +{ sub expA, expA, tmpA ; sub expB, expB, tmpB } +{ addi tmpA, tmpA, -1 ; addi tmpB, tmpB, -1 } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi expA,expB, 8(exp_mod) + xm.stdi mantA,mantB, 8(a) + + xm.lddi mantA,expA, 32(b) + xm.lddi mantB,expB, 40(b) +{ xm.cls tmpA, mantA ; nop } +{ nop ; xm.cls tmpB, mantB } +{ sub expA, expA, tmpA ; sub expB, expB, tmpB } +{ addi tmpA, tmpA, -1 ; addi tmpB, tmpB, -1 } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi expA,expB, 16(exp_mod) + xm.stdi mantA,mantB, 16(a) + + xm.lddi mantA,expA, 48(b) + xm.lddi mantB,expB, 56(b) +{ xm.cls tmpA, mantA ; nop } +{ nop ; xm.cls tmpB, mantB } +{ sub expA, expA, tmpA ; sub expB, expB, tmpB } +{ addi tmpA, tmpA, -1 ; addi tmpB, tmpB, -1 } +{ xm.shl mantA, mantA, tmpA ; xm.shl mantB, mantB, tmpB }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantA, mantA, tmpA \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mantB, mantB, tmpB \nMessage: The shift amount is not 32" */ + xm.stdi expA,expB, 24(exp_mod) + xm.stdi mantA,mantB, 24(a) + + xm.vlashr a, t3 +{ xm.neg tmpC, tmpC ; nop} +{nop ; xm.vstr a} + +lui t3, %hi(.L_32_Q24) + addi t3,t3, %lo(.L_32_Q24) + xm.vlashr exp_mod, tmpC +{ nop ; xm.vladd t3} +{ nop ; xm.vstr exp_mod} + + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/misc/xs3_memcpy.S b/lib_xcore_math/src/arch/vx4b/misc/xs3_memcpy.S new file mode 100644 index 00000000..7530adb5 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/misc/xs3_memcpy.S @@ -0,0 +1,53 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +void xs3_memcpy( + void* dst, + const void* src, + unsigned bytes); +*/ + +#define NSTACKWORDS (0) +#define FUNCTION_NAME xs3_memcpy + +#define a x10 +#define b x11 +#define len x12 +#define tmp x13 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { srli tmp, len, 5 ; xm.zexti len, 5 } + { li t3, 32 ; xm.brff tmp, .L_loop_bot } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_loop_top: + { add b, b, t3 ; xm.vldd b} + { addi tmp, tmp, -1 ; xm.vstd a} + { add a, a, t3 ; xm.bt tmp, .L_loop_top } + .L_loop_bot: + { xm.mkmsk len, len ; xm.brff len, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { mv t3, b ; nop } + { nop ; xm.vldr t3} + xm.vstrpv a, len +.L_finish: + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/scalar/f32_log2.S b/lib_xcore_math/src/arch/vx4b/scalar/f32_log2.S new file mode 100644 index 00000000..e325fc35 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/f32_log2.S @@ -0,0 +1,69 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.align 4; /* Translation error on this line: unexpected token at position 8. */ + +/* +float f32_log2( + const float x); +*/ +#define FUNCTION_NAME f32_log2 +#define NSTACKWORDS (4) + +#define x x10 +#define tmp x11 +#define exp x12 +#define _0 x13 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4 +{ mv a1, a0 ; addi a0,sp, 4 } + la t3, f32_normA + jalr t3 + +{ li tmp, 1 ; li t3, 23 } +{ li _0, 0 ; lw exp, 4 (sp)} + xm.fmake tmp, _0, t3, _0, tmp + xm.fsub x, x, tmp + xm.fmake exp, _0, t3, _0, exp +la t3, log2_ps +{ nop ; sw exp, 4 (sp)} +{ mv a1, t3 ; li a2, 11 } +la t3, f32_power_series + jalr t3 +{ nop ; lw exp, 4 (sp)} + xm.fadd x, x, exp + xm.retsp (NSTACKWORDS)*4 +.L_func_end_unpack: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function + + +.weak FUNCTION_NAME.callees +.add_to_set FUNCTION_NAME.callees,f32_normA.nstackwords +.add_to_set FUNCTION_NAME.callees,f32_power_series.nstackwords +.max_reduce FUNCTION_NAME.callee_maxstackwords,FUNCTION_NAME.callees,0 +.set FUNCTION_NAME.nstackwords,NSTACKWORDS+FUNCTION_NAME.callee_maxstackwords +.global FUNCTION_NAME.nstackwords + +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/scalar/f32_norm.S b/lib_xcore_math/src/arch/vx4b/scalar/f32_norm.S new file mode 100644 index 00000000..affeeb9d --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/f32_norm.S @@ -0,0 +1,48 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.align 4; /* Translation error on this line: unexpected token at position 8. */ + +/* +float f32_normA( + exponent_t* exp, + const float x); +*/ +#define FUNCTION_NAME f32_normA +#define NSTACKWORDS (0) + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.fsexp a2, a3, a1 + xm.fmant a1, a1 +{ addi a3, a3, 1 ; li t3, 0 } +{ addi a0, t3, -1 ; sw a3,0 ( a0)} + xm.fmake a0, a2, a0, t3, a1 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end_unpack: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end_unpack - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/scalar/f32_power_series.S b/lib_xcore_math/src/arch/vx4b/scalar/f32_power_series.S new file mode 100644 index 00000000..8f44e955 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/f32_power_series.S @@ -0,0 +1,133 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +float f32_power_series( + const float x, + const float coef[], + const unsigned terms_count); +*/ + + +#define NSTACKWORDS (4) + +#define FUNCTION_NAME f32_power_series + +#define x x10 +#define coef x11 +#define count x12 +#define acc x13 +#define tmpA x18 +#define pow x19 + +// these unroll settings seem to offer the best +// tradeoff between code size and speed (3/8 should also work) +#define UNROLL_LOG2 2 +#define UNROLL 4 + +#define CAT_(A, B) A##B +#define CAT(A, B) CAT_(A,B) + +#define FULL_LOOP_LBL CAT(.L_loop_, UNROLL) + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + +{ li acc, 0 ; mv pow, x } +{ srli t3, count, UNROLL_LOG2 ; nop }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri t3, count, UNROLL_LOG2 \nMessage: The shift amount is not 32" */ + + .L_loop_top: + { xm.subi t3, count, UNROLL ; xm.bt t3, .L_loop_full } + slli t3, t3, 2 + { add coef, coef, t3 ; xm.bru count } + xm.assert count +#if (UNROLL_LOG2 >= 1) + tail .L_loop_1 +#endif +#if (UNROLL_LOG2 >= 2) + tail .L_loop_2 + tail .L_loop_3 +#endif +#if (UNROLL_LOG2 >= 3) + tail .L_loop_4 + tail .L_loop_5 + tail .L_loop_6 + tail .L_loop_7 +#endif + + .L_loop_full: +#if (UNROLL_LOG2 >= 3) + .L_loop_8: + { addi count, count, -1 ; lw tmpA,(UNROLL-8)*4 ( coef)} + xm.fmacc acc, acc, pow, tmpA + xm.fmul pow, pow, x + .L_loop_7: + { addi count, count, -1 ; lw tmpA,(UNROLL-7)*4 ( coef)} + xm.fmacc acc, acc, pow, tmpA + xm.fmul pow, pow, x + .L_loop_6: + { addi count, count, -1 ; lw tmpA,(UNROLL-6)*4 ( coef)} + xm.fmacc acc, acc, pow, tmpA + xm.fmul pow, pow, x + .L_loop_5: + { addi count, count, -1 ; lw tmpA,(UNROLL-5)*4 ( coef)} + xm.fmacc acc, acc, pow, tmpA + xm.fmul pow, pow, x +#endif +#if (UNROLL_LOG2 >= 2) + .L_loop_4: + { addi count, count, -1 ; lw tmpA,(UNROLL-4)*4 ( coef)} + xm.fmacc acc, acc, pow, tmpA + xm.fmul pow, pow, x + .L_loop_3: + { addi count, count, -1 ; lw tmpA,(UNROLL-3)*4 ( coef)} + xm.fmacc acc, acc, pow, tmpA + xm.fmul pow, pow, x +#endif +#if (UNROLL_LOG2 >= 1) + .L_loop_2: + { addi count, count, -1 ; lw tmpA,(UNROLL-2)*4 ( coef)} + xm.fmacc acc, acc, pow, tmpA + xm.fmul pow, pow, x +#endif + .L_loop_1: + { addi count, count, -1 ; lw tmpA,(UNROLL-1)*4 ( coef)} + xm.fmacc acc, acc, pow, tmpA + xm.fmul pow, pow, x + + li t3, UNROLL*4 + add coef, coef, t3 + { srli t3, count, UNROLL_LOG2 ; xm.bt count, .L_loop_top }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri t3, count, UNROLL_LOG2 \nMessage: The shift amount is not 32" */ + + +.L_finish: + mv a0, acc + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S b/lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S new file mode 100644 index 00000000..6c5421ba --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/f32_sin.S @@ -0,0 +1,147 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.p2align 2 + +/* +float f32_sin( + const float theta); +*/ + +#define FUNCTION_NAME f32_sin +#define NSTACKWORDS (8) + + + +#define r x10 +#define phi x11 +#define out_mul x12 +#define tmp x13 + +#define _0 x23 +#define _1 x24 + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,0 +{ li _0, 0 ; sw s8, 24 (sp)} + xm.flt t3, r, _0 +{ nop ; li _1, 1 } +{ li out_mul, 23 ; xm.bt t3, .L_neg } + +// sin(-x) = -sin(x) -- sin() has odd symmetry, so let's only deal with positive angles +.L_pos: + xm.fmake out_mul, _0, out_mul, _0, _1 // +1.0f + tail .L_qwer +.L_neg: + xm.fmake out_mul, _1, out_mul, _0, _1 // -1.0f + xm.fmul r, r, out_mul + +.L_qwer: + +// Normalize our angle to be 0 <= r < 4.0f +// because sin(x) = sin(x + k*2*pi) for int k + lw t3, two_over_pi + xm.fmul r, r, t3 + +{ li t3, 21 ; nop } + xm.fmake tmp, _0, t3, _0, _1 // +0.25f + xm.fmul tmp, r, tmp + xm.fsexp s6, t3, tmp + xm.fmant tmp, tmp +{ xm.neg s6, t3 ; li s5, 23 } +{ add s6, s6, s5 ; addi t3, t3, 2 } + xm.shr tmp, tmp, s6 + xm.shl tmp, tmp, s6 + xm.fmake tmp, _0, t3, _0, tmp // tmp <-- 4.0*floor(r/4.0) + + xm.fsub r, r, tmp // r <-- r - 4.0*floor(r/4.0) + +// sin(pi + x) = -sin(x) if pi < x <= 2*pi +{ li t3, 24 ; nop } + xm.fmake tmp, _0, t3, _0, _1 // +2.0f + xm.flt t3, r, tmp +{ nop ; xm.bt t3, .L_wert } + xm.fsub r, r, tmp // r <-- r - 2 + xm.fsub out_mul, _0, out_mul // out_mul <-- -out_mul +.L_wert: + +// sin(pi/2 + x) = sin(pi - x) + xm.fmul t3, out_mul, out_mul // x28 <-- out_mul^2 = 1.0 + xm.flt t3, r, t3 // x28 <-- r < 1.0 +{ nop ; xm.bt t3, .L_erty } + xm.fsub r, tmp, r // r <-- 2.0 - r +.L_erty: + + +// Now, we have an angle r in the first quadrant +// r is a normalized angle where 0.0 <= r < 1.0 + +// Now apply power series for sin() + +#define total x18 +#define coefs x19 + +lui t3, %hi(sin_coef) + addi t3,t3, %lo(sin_coef) +{ mv coefs, t3 ; lw tmp,0 ( t3)} + xm.fmul phi, r, r + xm.fmul total, r, tmp + + +#define PS_TERM(N) \ + xm.fmul r, r, phi; \ + xm.ldw tmp, (N*4)(coefs); \ + xm.fmacc total, total, r, tmp; + +PS_TERM(1) +PS_TERM(2) +PS_TERM(3) +PS_TERM(4) +PS_TERM(5) +PS_TERM(6) +PS_TERM(7) + +// Apply final output multiplier + xm.fmul a0, total, out_mul + + xm.lddsp s7,s6,0 + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 +{ nop ; lw s8, 24 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end - FUNCTION_NAME + + + + + + + + + + + + + +#endif //defined(__VX4B__) + diff --git a/lib_xcore_math/src/arch/vx4b/scalar/float_s32.c b/lib_xcore_math/src/arch/vx4b/scalar/float_s32.c new file mode 100644 index 00000000..2490f7a2 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/float_s32.c @@ -0,0 +1,87 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__VX4B__) +#include +#include +#include + +#include "xmath/xmath.h" +#include "vpu_helper.h" +#include "xmath/xs3/vpu_scalar_ops.h" +#include "vpu_const_vects.h" + + +static inline +int64_t maccs(int64_t acc, int32_t x, int32_t y) +{ + return acc + (((int64_t)x) * y); +} + +static inline +int32_t lextract(int64_t acc, unsigned pos) +{ + return (acc >> pos) & 0xFFFFFFFF; +} + +float_s32_t float_s32_exp( + const float_s32_t b) +{ + float_s32_t res = {0,0}; + + const int32_t one = 0x40000000; + const int32_t sqrt_2 = 0x5a82799a; + const int32_t log2_e = 0x5c551d95; + const int32_t ln_2 = 0x2c5c85fe; + + headroom_t hr = HR_S32(b.mant); + + int32_t tmp1 = vlashr32(b.mant, -(int)hr); + tmp1 = vlashr32(tmp1, 1); + + res.exp = (b.exp - hr) + 1; + + // compute y = x * log2(e) + int32_t y = lextract(maccs(0, log2_e, tmp1), 30); + + if( res.exp >= 0 ){ + res.mant = one; + res.exp = res.exp - 30; + return res; + } + + int frac_bits = -res.exp; + + right_shift_t shr = -30 + frac_bits; + + int32_t alpha = 0; + int32_t rho = 0; + int32_t beta = 0; + unsigned mask = 0; + + if(frac_bits == 31){ + alpha = (y < 0)? -1 : 0; + rho = y & 0x40000000; + beta = y & 0x3FFFFFFF; + beta = vlashr32(beta, shr); + } else if(frac_bits >= 32){ + alpha = (y < 0)? -1 : 0; + rho = (y < 0)? 1 : 0; + beta = vlashr32(y, shr) + ((y < 0)? 0x20000000 : 0); + } else { + mask = (1 << (frac_bits-1)) - 1; + beta = y & mask; + tmp1 = y >> (frac_bits-1); + rho = tmp1 & 1; + alpha = tmp1 >> 1; + beta = vlashr32(beta, shr); + } + + res.exp = alpha - 30; + int32_t two_to_rho = rho? sqrt_2 : one; + int32_t z = lextract(maccs(0, beta, ln_2), 30); + z = q30_exp_small(z); + res.mant = lextract(maccs(0, two_to_rho, z), 30); + return res; +} + +#endif // defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/scalar/float_s32_exp.almost b/lib_xcore_math/src/arch/vx4b/scalar/float_s32_exp.almost new file mode 100644 index 00000000..2897d8fb --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/float_s32_exp.almost @@ -0,0 +1,153 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.align 4; /* Translation error on this line: unexpected token at position 8. */ + +/* +float_s32_t float_s32_exp( + const float_s32_t b); +*/ +#define FUNCTION_NAME float_s32_exp +#define NSTACKWORDS (8) + + +#define STACK_RHO (0) +#define STACK_A (1) + + + +#define b_0 x10 +#define b_1 x11 + +#define tmp1 x12 +#define tmp2 x13 +#define tmp3 x18 +#define consts x19 + +.L_consts: +.L_none: .word 0x00000000 +.L_one: .word 0x40000000 +.L_sqrt_2: .word 0x5a82799a +.L_log2_e: .word 0x5c551d95 +.L_ln_2: .word 0x2c5c85fe + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 +la t3, .L_consts +// Load input, reformat to have 1 bit of headroom and store in +// output. + +{ mv consts, t3; nop} +{ xm.cls t3, b_0 ; nop} + +{ addi t3, t3, -1 ; nop } +{ xm.shl tmp1, tmp1, t3 ; addi t3, t3, -1 } + srai tmp1, tmp1, 1 +{ sub tmp2, b_1, t3 ; mv s4, tmp1}//a[0] +{ nop ; mv s5, tmp2} //a[1] + +#undef b +#define y x11 + +//// Compute y = x * log2(e) +{ mv y, tmp1 ; nop } +{ li tmp1, 0 ; lw t3,12 ( consts)} + xm.maccs tmp1, y, t3, y /// astew: Is this correct... isn't this doing acc = y + log2(e) * y ?? +{ li t3, 30 ; nop } + xm.lextract y, tmp1, y, t3, 32 + +//// Deal with fractional bit count +{ xm.clz tmp3, tmp2 ; nop} +{nop; xm.neg t3, tmp2 } +{ nop ; xm.brff tmp3, .L_neg_exp } + +//// If the exponent is non-negative, then the best estimate we can give is 2^(y< [1, R, R^2, R^3, ...], which converges to +// (1/(1-R)). Given the bounds for R, 1 <= (1/(1-R)) <= (4/3). + +// Specifically, the final term is (1/(1-R))*beta*(alpha^15), but we don't want to do a division, +// so by just picking a value of R and always using that, we should significantly improve our +// absolute error (compared to not including the final convergent sum term at all). We should prefer +// larger values of R because the absolute error is greater there, but it looks like we get the +// best results when we haven't gone quite all the way to (4/3). + +// I've experimentally found that the following seems to give the lowest absolute error in the test. +.word 0x6b6cb9bd // Q30( beta * (4/3)^( 0.9605835543766578 ) ) +// .word 0x6ca65798 // Q30( beta * (4/3)^(1) ) +.L_vec_s_hat: + .word 1,3,5,7,9,11,13,15 +.L_weights: + .word 0x40000000, 0x40000000, 0x40000000, 0x40000000 + .word 0x40000000, 0x40000000, 0x40000000, 0x40000000 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,0 +{ li t3, 0 ; addi vec_r,sp, (VEC_R)*4 } +{ xm.slt out_mul, a, t3 ; xm.vsetc t3 } // Result gets multiplied by -1 if +{ li _30, 30 ; xm.brff out_mul, .L_hgfd } +{ xm.neg a, a ; nop } +.L_hgfd: + + xm.lmul tmpA, tmpB, a, a, t3, t3 + xm.lextract r, tmpA, tmpB, _30, 32 // extract theta^2 + xm.lmul tmpA, tmpB, a, r, t3, t3 // theta * theta^2 + xm.lextract tmpA, tmpA, tmpB, _30, 32 + xm.stdi a,tmpA, 0 (vec_r)// theta, theta^3 + +#undef a // no longer needed +#define tmpC x10 + + xm.lmul tmpA, tmpB, tmpA, r, t3, t3 + xm.lextract tmpB, tmpA, tmpB, _30, 32 // theta^5 + xm.lmul tmpA, tmpC, tmpB, r, t3, t3 + xm.lextract tmpA, tmpA, tmpC, _30, 32 // theta^7 + xm.stdi tmpB,tmpA, 8 (vec_r)// theta^5, theta^7 + + xm.lmul tmpA, tmpB, tmpA, r, t3, t3 + xm.lextract tmpB, tmpA, tmpB, _30, 32 // theta^9 + // stw tmpB, vec_r[4] // if we only wanted 5 terms + xm.lmul tmpA, tmpC, tmpB, r, t3, t3 + xm.lextract tmpA, tmpA, tmpC, _30, 32 // theta^11 + xm.stdi tmpB,tmpA, 16 (vec_r)// theta^9, theta^11 + + xm.lmul tmpA, tmpB, tmpA, r, t3, t3 + xm.lextract tmpB, tmpA, tmpB, _30, 32 // theta^13 + xm.lmul tmpA, tmpC, tmpB, r, t3, t3 + xm.lextract tmpA, tmpA, tmpC, _30, 32 // theta^15 + xm.stdi tmpB,tmpA, 24 (vec_r)// theta^13, theta^15 + +// Now that we've filled in vec_R[], we just need to do the VPU stuff. +// Note: All coefficients are positive and so are all elements or vec_r[], +// and we know they can't add to more than 1.0 + +{ xm.ldap t3, .L_vec_b ; xm.vclrdr } +{ xm.ldap t3, .L_vec_s_hat ; xm.vldc t3} // vC[] <-- P.S. coefficients +{ nop ; xm.vlmacc0 vec_r} // inner product with power vect +xm.vlsat t3 +{ mv t3, vec_r ; nop } // ensure they're all in the same q-format +{ nop ; xm.vstr t3} +{ xm.ldap t3, .L_weights ; xm.vclrdr } +{ nop ; xm.vldc t3} +{ mv t3, vec_r ; xm.vlmaccr0 vec_r} // add them together +{ nop ; xm.vstr t3} +{ nop ; lw a0,0 ( vec_r)} + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,0 +{ nop ; xm.bt out_mul, .L_gpgp } +{ nop ; xm.retsp (NSTACKWORDS)*4 } +.L_gpgp: +{ xm.neg a0, a0 ; nop} +{ xm.retsp (NSTACKWORDS*4) ; nop} + +.L_func_end: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end - FUNCTION_NAME + + + + + + + + + + + + + +#endif //defined(__VX4B__) + diff --git a/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S new file mode 100644 index 00000000..03683e88 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S @@ -0,0 +1,511 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.p2align 2 + + +#define CAT_(A, B) A##B +#define CAT(A, B) CAT_(A,B) + +#define FUNC_START \ + .text ; \ + .globl FUNCTION_NAME ; \ + .type FUNCTION_NAME,@function ; \ + .p2align 4 + + +#define FUNC_END \ + .set FUNCTION_NAME.nstackwords,NSTACKWORDS; \ + .global FUNCTION_NAME.nstackwords; \ + .set FUNCTION_NAME.maxcores,1; \ + .global FUNCTION_NAME.maxcores; \ + .set FUNCTION_NAME.maxtimers,0; \ + .global FUNCTION_NAME.maxtimers; \ + .set FUNCTION_NAME.maxchanends,0; \ + .global FUNCTION_NAME.maxchanends; \ + CAT(.L_size_end_, FUNCTION_NAME): \ + .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME + + + + + + + + + + +/* ***************************************************** + +int16_t vladd16( + const int16_t x, + const int16_t y); + +********************************************************/ + +#define FUNCTION_NAME vladd16 +#define NSTACKWORDS (4) + +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vladd t3} + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; xm.retsp (NSTACKWORDS) *4 } +FUNC_END + +// //.cc_bottom FUNCTION_NAME.function; +// .set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; +// .set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; +// .set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; +// .set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; + +// CAT(.L_size_end_, FUNCTION_NAME): +// .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int16_t vlsub16( + const int16_t x, + const int16_t y); + +********************************************************/ + +#define FUNCTION_NAME vlsub16 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a1, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a0, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vlsub t3} + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sexti a0, 16 ; nop} +{nop; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int16_t vlashr16( + const int16_t x, + const right_shift_t shr); + +********************************************************/ + +#define FUNCTION_NAME vlashr16 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ xm.mkmski a2, 4 ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ + xm.vlashr t3, a1 + xm.vstrpv t3, a2 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int16_t vpos16( + const int16_t x); + +********************************************************/ + +#define FUNCTION_NAME vpos16 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vpos } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; nop} +{nop; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int16_t vsign16( + const int16_t x); + +********************************************************/ + +#define FUNCTION_NAME vsign16 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vsign } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +unsigned vdepth1_16( + const int16_t x); + +********************************************************/ + +#define FUNCTION_NAME vdepth1_16 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vdepth1 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.zexti a0, 1 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int8_t vdepth8_16( + const int16_t x); + +********************************************************/ + +#define FUNCTION_NAME vdepth8_16 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vdepth8 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 8 ; nop} +{nop; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int16_t vlmul16( + const int16_t x, + const int16_t y); + +********************************************************/ + +#define FUNCTION_NAME vlmful16 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vlmul0 t3} +xm.vlmul1 t3 + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +vpu_int16_acc_t vlmacc16( + const vpu_int16_acc_t acc, + const int16_t x, + const int16_t y); + +********************************************************/ + +#define FUNCTION_NAME vlmacc16 +#define NSTACKWORDS (8) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ srli a3, a0, 16 ; xm.zexti a0, 16 } +{ addi t3,sp, 0 ; sw a3, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldd t3} +{ nop ; sw a0, 0 (sp)} +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ nop ; xm.vldc t3} +{ nop ; sw a2, 0 (sp)} +{ nop ; xm.vlmacc0 t3} +xm.vlmacc1 t3 +{ nop ; xm.vstd t3} +{ nop ; lw a1, 0 (sp)} +{ slli a1, a1, 16 ; xm.vstr t3} +{ nop ; lw a0, 0 (sp)} +{ xm.zexti a0, 16 ; nop } +{ or a0, a0, a1 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +vpu_int16_acc_t vlmaccr16( + const vpu_int16_acc_t acc, + const int16_t x[VPU_INT16_EPV], + const int16_t y[VPU_INT16_EPV]); + +********************************************************/ + +#define FUNCTION_NAME vlmaccr16 +#define NSTACKWORDS (12) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } +{ xm.mkmski s8, 16 ; sw s8, 32 (sp)} + li t3, 0x100 +{ addi t3,sp, 0 ; xm.vsetc t3} +{ nop ; xm.vclrdr } +{ nop ; xm.vstd t3} + +// The *last* accumulator is the one that will be added to. + +{ slli a3, a0, 16 ; xm.andnot a0, s8 } +{ addi t3,sp, 0 ; sw a0, 28 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldd t3} +{ nop ; sw a3, 28 (sp)} +{ li a3, 15 ; xm.vldr t3} + +.L_vlmaccr16_loop_top1: +xm.ld16s s8, a3(a1) + xm.st16 s8, a3(t3) + { addi a3, a3, -1 ; xm.bt a3, .L_vlmaccr16_loop_top1 } + +{ li a3, 15 ; xm.vldc t3} + +.L_vlmaccr16_loop_top2: +xm.ld16s s8, a3(a2) + xm.st16 s8, a3(t3) + { addi a3, a3, -1 ; xm.bt a3, .L_vlmaccr16_loop_top2 } + +{ nop ; xm.vlmaccr0 t3} +xm.vlmaccr1 t3 +{ nop ; xm.vstd t3} +{ nop ; lw a1, 0 (sp)} +{ nop ; xm.vstr t3} +{ slli a1, a1, 16 ; lw a0, 0 (sp)} +{ xm.zexti a0, 16 ; lw s8, 32 (sp)} +{ or a0, a0, a1 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int16_t vlsat16( + const vpu_int16_acc_t acc, + const unsigned sat); + +********************************************************/ + +#define FUNCTION_NAME vlsat16 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x100 +{ nop ; xm.vsetc t3} +{ srli a3, a0, 16 ; xm.zexti a0, 16 } +{ addi t3,sp, 0 ; sw a3, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldd t3} +{ nop ; sw a0, 0 (sp)} +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ xm.mkmski a1, 4 ; nop} +xm.vlsat t3 + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; nop} +{nop; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int16_t vadddr16( + const vpu_int16_acc_t acc[VPU_INT16_ACC_PERIOD]); + +********************************************************/ + +#define FUNCTION_NAME vadddr16 +#define NSTACKWORDS (12 + 8*2) +FUNC_START +FUNCTION_NAME: +{ li a1, 8 ; xm.entsp (NSTACKWORDS)*4 } + xm.stdsp s3,s2,8 + li t3, 0x100 +{ addi a2,sp, 16 ; xm.vsetc t3} +{ addi a3,sp, 48 ; li t3, 0 } + +.L_split_loop_top: + { nop ; lw s2,0 ( a0)} + { addi a0, a0, 8 ; lw s3,4 ( a0)} +xm.zip s2, s3, 4 + { addi a1, a1, -1 ; sw s2,0 ( a3)} + { addi a3, a3, 4 ; sw s3,0 ( a2)} + { addi a2, a2, 4 ; xm.bt a1, .L_split_loop_top } + +{ xm.ldawsp a3, 12 ; nop} +{nop; xm.ldawsp t3, 4 } +{ nop ; xm.vldd a3} +{ nop ; xm.vldr t3} +//{ nop ; xm.vadddr } +{ nop ; xm.vstd a2} +{ nop ; lw s2,0 ( a2)} +{ slli s2, s2, 16 ; xm.vstr a2} +{ nop ; lw a0,0 ( a2)} +{ or a0, a0, s2 ; nop } + + xm.lddsp s3,s2,8 +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S new file mode 100644 index 00000000..8d0f646b --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S @@ -0,0 +1,563 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define CAT_(A, B) A##B +#define CAT(A, B) CAT_(A,B) + +#define FUNC_START \ + .text ; \ + .globl FUNCTION_NAME ; \ + .type FUNCTION_NAME,@function ; \ + .p2align 4 + + +#define FUNC_END \ + .set FUNCTION_NAME.nstackwords,NSTACKWORDS; \ + .global FUNCTION_NAME.nstackwords; \ + .set FUNCTION_NAME.maxcores,1; \ + .global FUNCTION_NAME.maxcores; \ + .set FUNCTION_NAME.maxtimers,0; \ + .global FUNCTION_NAME.maxtimers; \ + .set FUNCTION_NAME.maxchanends,0; \ + .global FUNCTION_NAME.maxchanends; \ + CAT(.L_size_end_, FUNCTION_NAME): \ + .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME + + + + +/* ***************************************************** + +int32_t vladd32( + const int32_t x, + const int32_t y); + +********************************************************/ + +#define FUNCTION_NAME vladd32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vladd t3} + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int32_t vlsub32( + const int32_t x, + const int32_t y); + +********************************************************/ + +#define FUNCTION_NAME vlsub32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a1, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a0, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vlsub t3} + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int32_t vlashr32( + const int32_t x, + const right_shift_t shr); + +********************************************************/ + +#define FUNCTION_NAME vlashr32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ xm.mkmski a2, 4 ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ + xm.vlashr t3, a1 + xm.vstrpv t3, a2 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int32_t vpos32( + const int32_t x); + +********************************************************/ + +#define FUNCTION_NAME vpos32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vpos } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int32_t vsign32( + const int32_t x); + +********************************************************/ + +#define FUNCTION_NAME vsign32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vsign } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +unsigned vdepth1_32( + const int32_t x); + +********************************************************/ + +#define FUNCTION_NAME vdepth1_32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vdepth1 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.zexti a0, 1 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int8_t vdepth8_32( + const int32_t x); + +********************************************************/ + +#define FUNCTION_NAME vdepth8_32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vdepth8 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 8 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int16_t vdepth16_32( + const int32_t x); + +********************************************************/ + +#define FUNCTION_NAME vdepth16_32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vdepth16 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int32_t vlmul32( + const int32_t x, + const int32_t y); + +********************************************************/ + +#define FUNCTION_NAME vlmul32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vlmul0 t3} + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +vpu_int32_acc_t vlmacc32( + const vpu_int32_acc_t acc, + const int32_t x, + const int32_t y); + +********************************************************/ + +#define FUNCTION_NAME vlmacc32 +#define NSTACKWORDS (12) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ addi t3,sp, 0 ; xm.vsetc t3} +{ nop ; sw a1, 0 (sp)} +{ nop ; xm.vldd t3} +{ nop ; sw a0, 0 (sp)} +{ nop ; xm.vldr t3} +{ nop ; sw a2, 0 (sp)} +{ nop ; xm.vldc t3} +{ nop ; sw a3, 0 (sp)} +{ nop ; xm.vlmacc0 t3} +{ nop ; xm.vstd t3} +{ nop ; lw a1, 0 (sp)} +{ nop ; xm.vstr t3} +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +vpu_int32_acc_t vlmaccr32( + const vpu_int32_acc_t acc, + const int32_t x[VPU_INT32_EPV], + const int32_t y[VPU_INT32_EPV]); + +********************************************************/ + +#define FUNCTION_NAME vlmaccr32 +#define NSTACKWORDS (8) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ addi t3,sp, 0 ; xm.vsetc t3} + +// The *last* accumulator is the one that will be added to. +{ nop ; sw a1, 28 (sp)} +{ nop ; xm.vldd t3} +{ nop ; sw a0, 28 (sp)} +{ nop ; xm.vldr t3} +{ nop ; xm.vldc a2} +{ nop ; xm.vlmaccr0 a3} +{ nop ; xm.vstd t3} +{ nop ; lw a1, 0 (sp)} +{ nop ; xm.vstr t3} +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int32_t vlsat32( + const vpu_int32_acc_t acc, + const unsigned sat); + +********************************************************/ + +#define FUNCTION_NAME vlsat32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ addi t3,sp, 0 ; xm.vsetc t3} +{ nop ; sw a1, 0 (sp)} +{ nop ; xm.vldd t3} +{ nop ; sw a0, 0 (sp)} +{ nop ; xm.vldr t3} +{ nop ; sw a2, 0 (sp)} +{ xm.mkmski a1, 4 ; nop} +xm.vlsat t3 + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int32_t vcmr32( + const complex_s32_t x, + const complex_s32_t y); + +********************************************************/ + +#define FUNCTION_NAME vcmr32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ addi t3,sp, 0 ; xm.vsetc t3} +{ nop ; xm.vldd a0} +{ nop ; xm.vldc a1} +{ xm.mkmski a1, 8 ; xm.vcmr0 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int32_t vcmi32( + const complex_s32_t x, + const complex_s32_t y); + +********************************************************/ + +#define FUNCTION_NAME vcmi32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ addi t3,sp, 0 ; xm.vsetc t3} +{ nop ; xm.vldd a0} +{ nop ; xm.vldc a1} +{ xm.mkmski a1, 8 ; xm.vcmi0 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 4 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int32_t vcmcr32( + const complex_s32_t x, + const complex_s32_t y); + +********************************************************/ + +#define FUNCTION_NAME vcmcr32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ addi t3,sp, 0 ; xm.vsetc t3} +{ nop ; xm.vldd a0} +{ nop ; xm.vldc a1} +{ xm.mkmski a1, 8 ; xm.vcmcr0 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int32_t vcmci32( + const complex_s32_t x, + const complex_s32_t y); + +********************************************************/ + +#define FUNCTION_NAME vcmci32 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } +{ addi t3,sp, 0 ; xm.vsetc t3} +{ nop ; xm.vldd a0} +{ nop ; xm.vldc a1} +{ xm.mkmski a1, 8 ; xm.vcmci0 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 4 (sp)} +{ nop ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S new file mode 100644 index 00000000..8f5f7fd1 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S @@ -0,0 +1,423 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define CAT_(A, B) A##B +#define CAT(A, B) CAT_(A,B) + +#define FUNC_START \ + .text ; \ + .globl FUNCTION_NAME ; \ + .type FUNCTION_NAME,@function ; \ + .p2align 4 + + +#define FUNC_END \ + .set FUNCTION_NAME.nstackwords,NSTACKWORDS; \ + .global FUNCTION_NAME.nstackwords; \ + .set FUNCTION_NAME.maxcores,1; \ + .global FUNCTION_NAME.maxcores; \ + .set FUNCTION_NAME.maxtimers,0; \ + .global FUNCTION_NAME.maxtimers; \ + .set FUNCTION_NAME.maxchanends,0; \ + .global FUNCTION_NAME.maxchanends; \ + CAT(.L_size_end_, FUNCTION_NAME): \ + .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int8_t vladd8( + const int8_t x, + const int8_t y); + +********************************************************/ + +#define FUNCTION_NAME vladd8 +#define NSTACKWORDS (4) + +// .global FUNCTION_NAME; +// .type FUNCTION_NAME,@function; +// .cc_top FUNCTION_NAME.function, FUNCTION_NAME + +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vladd t3} + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 8 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END + +// //.cc_bottom FUNCTION_NAME.function; +// .set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; +// .set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; +// .set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; +// .set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; + +// CAT(.L_size_end_, FUNCTION_NAME): +// .size FUNCTION_NAME, CAT(.L_size_end_, FUNCTION_NAME) - FUNCTION_NAME + +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int8_t vlsub8( + const int8_t x, + const int8_t y); + +********************************************************/ + +#define FUNCTION_NAME vlsub8 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a1, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a0, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vlsub t3} + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int8_t vlashx22( + const int8_t x, + const right_shift_t shr); + +********************************************************/ + +#define FUNCTION_NAME vlashx22 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ xm.mkmski a2, 4 ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ + xm.vlashr t3, a1 + xm.vstrpv t3, a2 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 8 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int8_t vpos8( + const int8_t x); + +********************************************************/ + +#define FUNCTION_NAME vpos8 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vpos } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 8 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +int8_t vsign8( + const int8_t x); + +********************************************************/ + +#define FUNCTION_NAME vsign8 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vsign } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 16 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +/* ***************************************************** + +unsigned vdepth1_8( + const int8_t x); + +********************************************************/ + +#define FUNCTION_NAME vdepth1_8 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ xm.mkmski a1, 4 ; xm.vdepth1 } + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.zexti a0, 1 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int8_t vlmul8( + const int8_t x, + const int8_t y); + +********************************************************/ + +#define FUNCTION_NAME vlmul8 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ nop ; xm.vsetc t3} +{ addi t3,sp, 0 ; sw a0, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ xm.mkmski a1, 4 ; xm.vlmul0 t3} + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 8 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +vpu_int8_acc_t vlmacc8( + const vpu_int8_acc_t acc, + const int8_t x, + const int8_t y); + +********************************************************/ + +#define FUNCTION_NAME vlmacc8 +#define NSTACKWORDS (8) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ nop ; xm.vsetc t3} +{ srli a3, a0, 16 ; xm.zexti a0, 16 } +{ addi t3,sp, 0 ; sw a3, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldd t3} +{ nop ; sw a0, 0 (sp)} +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ nop ; xm.vldc t3} +{ nop ; sw a2, 0 (sp)} +{ nop ; xm.vlmacc0 t3} +{ nop ; xm.vstd t3} +{ nop ; lw a1, 0 (sp)} +{ slli a1, a1, 16 ; xm.vstr t3} +{ nop ; lw a0, 0 (sp)} +{ xm.zexti a0, 16 ; nop } +{ or a0, a0, a1 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +vpu_int8_acc_t vlmaccx22( + const vpu_int8_acc_t acc, + const int8_t x[VPU_INT8_EPV], + const int8_t y[VPU_INT8_EPV]); + +********************************************************/ + +#define FUNCTION_NAME vlmaccx22 +#define NSTACKWORDS (12) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } +{ xm.mkmski s8, 16 ; sw s8, 32 (sp)} + li t3, 0x200 +{ addi t3,sp, 0 ; xm.vsetc t3} +{ nop ; xm.vclrdr } +{ nop ; xm.vstd t3} + +// The *last* accumulator is the one that will be added to. + +{ slli a3, a0, 16 ; xm.andnot a0, s8 } +{ addi t3,sp, 0 ; sw a0, 28 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldd t3} +{ nop ; sw a3, 28 (sp)} +{ li a3, 31 ; xm.vldr t3} + +.L_vlmaccx22_loop_top1: + { nop ; xm.ld8u s8, a3 (a1)} + xm.st8 s8, a3(t3) + { addi a3, a3, -1 ; xm.bt a3, .L_vlmaccx22_loop_top1 } + +{ li a3, 31 ; xm.vldc t3} + +.L_vlmaccx22_loop_top2: + { nop ; xm.ld8u s8, a3 (a2)} + xm.st8 s8, a3(t3) + { addi a3, a3, -1 ; xm.bt a3, .L_vlmaccx22_loop_top2 } + +{ nop ; xm.vlmaccr0 t3} +{ nop ; xm.vstd t3} +{ nop ; lw a1, 0 (sp)} +{ nop ; xm.vstr t3} +{ slli a1, a1, 16 ; lw a0, 0 (sp)} +{ xm.zexti a0, 16 ; lw s8, 32 (sp)} +{ or a0, a0, a1 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + + +/* ***************************************************** + +int8_t vlsat8( + const vpu_int8_acc_t acc, + const unsigned sat); + +********************************************************/ + +#define FUNCTION_NAME vlsat8 +#define NSTACKWORDS (4) +FUNC_START +FUNCTION_NAME: +{ nop ; xm.entsp (NSTACKWORDS)*4 } + li t3, 0x200 +{ nop ; xm.vsetc t3} +{ srli a3, a0, 16 ; xm.zexti a0, 16 } +{ addi t3,sp, 0 ; sw a3, 0 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ +{ nop ; xm.vldd t3} +{ nop ; sw a0, 0 (sp)} +{ nop ; xm.vldr t3} +{ nop ; sw a1, 0 (sp)} +{ xm.mkmski a1, 4 ; nop} +xm.vlsat t3 + xm.vstrpv t3, a1 +{ nop ; lw a0, 0 (sp)} +{ xm.sext a0, 8 ; xm.retsp (NSTACKWORDS)*4 } +FUNC_END +#undef NSTACKWORDS +#undef FUNCTION_NAME + + + + + + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/scalar/sqrt_s32.S b/lib_xcore_math/src/arch/vx4b/scalar/sqrt_s32.S new file mode 100644 index 00000000..4a39eb83 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/scalar/sqrt_s32.S @@ -0,0 +1,129 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.p2align 2 + +/* +int32_t s32_sqrt( + exponent_t* y_exp, + const int32_t X, + const exponent_t x_exp, + const unsigned depth); + + @todo This can probably be sped up ~25% by using the VPU to compute 3 bits at a time. + (The speedup would be more significant if there was a quick way to create an element mask (vdepth1 creates a + byte mask) and a way to load each vR[k] from a single register. +*/ + +#define FUNCTION_NAME s32_sqrt +#define NSTACKWORDS (8) + + +#define y_exp x10 +#define X x11 +#define x_exp x12 +#define depth x13 +#define tmp x19 + + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,0 + + +{ xm.cls tmp, X ; nop} + xm.stwsp s8, 24 +{ addi tmp, tmp, -1 ; li t3, 31 } +{ xm.shl X, X, tmp ; sub tmp, x_exp, tmp }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli X, X, tmp \nMessage: The shift amount is not 32" */ +{ sub tmp, tmp, t3 ; sub x_exp, tmp, t3 } +{ xm.zexti tmp, 1 ; nop } +lui t3, %hi(vpu_vec_0x80000000) +addi t3,t3, %lo(vpu_vec_0x80000000) +{ nop ; lw t3,0 ( t3)} +{ nop ; xm.brff tmp, .L_is_even }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + srai t3, t3, 1 + { addi x_exp, x_exp, 1 ; nop } + +.L_is_even: + srai x_exp, x_exp, 1 +{ nop ; sw x_exp,0 ( y_exp)} + +#undef x_exp +#undef y_exp + +#define targ_hi x10 +#define targ_lo x11 +#define result x12 +#define guess x18 +#define base x20 +#define acc_hi x21 +#define acc_lo x22 +#define a_exp x23 + + +{ mv tmp, t3 ; mv t3, X } +{ li targ_hi, 0 ; li targ_lo, 0 } + xm.maccs targ_hi, targ_lo, tmp, t3 + +#undef X + +// Subtract just one more from targ_hi:targ_lo, so that we're doing <= instead of just < +{ li tmp, 1 ; xm.mkmski t3, 32 } + xm.maccs targ_hi, targ_lo, tmp, t3 + + li base, 0x40 +{ li result, 0 ; slli base, base, 24 } + +// @todo can potentially save a little bit of time by doing a clz on targ_hi. Might be able to skip the first iteration + +.L_loop_top: + { mv acc_hi, targ_hi ; mv acc_lo, targ_lo } + { add tmp, result, base ; addi depth, depth, -1 } + xm.maccs acc_hi, acc_lo, tmp, tmp + { xm.clz acc_hi, acc_hi ; nop } + { srli base, base, 1 ; xm.bt acc_hi, .L_too_large } + { mv result, tmp ; nop } + .L_too_large: + { nop ; xm.bt depth, .L_loop_top } +.L_loop_end: + + xm.lddsp s7,s6,0 + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 +{ nop ; lw s8, 24 (sp)} +{ mv a0, result ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME,.L_func_end - FUNCTION_NAME + + + + + + + + + + + + + +#endif //defined(__VX4B__) + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_complex_scale.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_complex_scale.S new file mode 100644 index 00000000..3adc356f --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_complex_scale.S @@ -0,0 +1,205 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_scale( + int16_t* a_real, + int16_t* a_imag, + const int16_t* b_real, + const int16_t* b_imag, + const int16_t c_real, + const int16_t c_imag, + const unsigned length, + const right_shift_t sat); + +*/ + +#define NSTACKVECS (4) +#define NSTACKWORDS (8+8*(NSTACKVECS)+4) + + +#define STACK_VEC_SAT (NSTACKWORDS-8-4) +#define STACK_VEC_C_REAL (NSTACKWORDS-16-4) +#define STACK_VEC_C_IMAG (NSTACKWORDS-24-4) +#define STACK_VEC_C_IMAG_N (NSTACKWORDS-32-4) + +#define FUNCTION_NAME vect_complex_s16_scale + +#define a_real x10 +#define a_imag x11 +#define b_real x12 +#define b_imag x13 +#define length x18 +#define _32 x19 +#define bytemask x20 + + +.text +.p2align 2 + +/* + We're doing this: + + vR <- -1 + vR <- -1 * b.imag + vC <- -b.imag + acc <- 0 + acc <- vC * c.imag + vC <- b.real + acc <- acc + vC * c.real + vR <- acc >> sat + a.real <- vR + + (vC still has b.real) + acc <- 0 + acc <- vC * c.imag + vC <- b.imag + acc <- acc + vC * c.real + vR <- acc >> sat + a.imag <- vR +*/ + + +FUNCTION_NAME: + + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in entsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + addi t3,sp, (STACK_VEC_C_IMAG_N)*4/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldawsp t3, STACK_VEC_C_IMAG_N\nMessage: the word-scale offset fits in a 6b unsigned immediate" */ + mv s4, a5 + xm.neg s2, s4 + xm.neg s3, s4 +xm.zip s3, s2, 4 + + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + + addi t3,sp, (STACK_VEC_C_IMAG)*4/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldawsp t3, STACK_VEC_C_IMAG\nMessage: the word-scale offset fits in a 6b unsigned immediate" */ + mv s2, s4 + + xm.zip s4, s2, 4 + + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + mv s2, a4 + mv s3, s2 +xm.zip s3, s2, 4 + + xm.stdi s2,s2, 32(t3) + xm.stdi s2,s2, 40(t3) + xm.stdi s2,s2, 48(t3) + xm.stdi s2,s2, 56(t3) + + mv s2, a7 + mv s3, s2 +xm.zip s3, s2, 4 + + xm.stdi s2,s2, 64(t3) + xm.stdi s2,s2, 72(t3) + xm.stdi s2,s2, 80(t3) + xm.stdi s2,s2, 88(t3) + + +.p2align 2 + mv length, a6 + { li _32, 32 ; nop}/* XAT Warning: 'LDWSP has unknown offset - this may need correction' */ +#define vect_count length + { srli vect_count, length, 4 ; slli bytemask, length, SIZEOF_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, length, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti bytemask, 5 ; slli t3, _32, 3 } + { addi t3,sp, (STACK_VEC_C_IMAG_N)*4 ; xm.vsetc t3}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + { nop ; xm.brff vect_count, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { nop ; xm.vldc t3} + { addi vect_count, vect_count, -1 ; xm.vclrdr } + { addi t3,sp, (STACK_VEC_C_REAL)*4 ; xm.vlmacc0 b_imag} + xm.vlmacc1 b_imag + { nop ; xm.vldc b_real} + { xm.vlmacc0 t3; nop} + xm.vlmacc1 t3 + { add b_real, b_real, _32 ; addi t3,sp, (STACK_VEC_SAT)*4} + xm.vlsat t3 + { add a_real, a_real, _32 ; xm.vstr a_real} + { addi t3,sp, (STACK_VEC_C_IMAG)*4 ; xm.vclrdr }/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + { nop ; xm.vlmacc0 t3} /* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + xm.vlmacc1 t3 + { addi t3,sp, (STACK_VEC_C_REAL)*4 ; xm.vldc b_imag}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + { nop ; xm.vlmacc0 t3} + xm.vlmacc1 t3 + { add b_imag, b_imag, _32 ; addi t3,sp, (STACK_VEC_SAT)*4} + xm.vlsat t3 + { add a_imag, a_imag, _32 ; xm.vstr a_imag} + { addi t3,sp, (STACK_VEC_C_IMAG_N)*4 ; xm.bt vect_count, .L_loop_top }/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + .L_loop_bot: + + { xm.mkmsk bytemask, bytemask ; xm.brff bytemask, .L_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vldc t3} + { nop ; xm.vclrdr } + { nop ; xm.vstd t3} + { addi t3,sp, (STACK_VEC_C_REAL)*4 ; xm.vlmacc0 b_imag} + xm.vlmacc1 b_imag +#define vec_tmp b_real + { addi vec_tmp,sp, (STACK_VEC_C_IMAG_N)*4 ; xm.vldc b_real}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + { nop ; xm.vlmacc0 t3}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + xm.vlmacc1 t3 + addi t3,sp, (STACK_VEC_SAT)*4 + xm.vlsat t3 + xm.vstrpv vec_tmp, bytemask + xm.vstrpv a_real, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + { addi t3,sp, (STACK_VEC_C_IMAG)*4 ; xm.vclrdr }/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + { nop ; xm.vlmacc0 t3} /* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + xm.vlmacc1 t3 + { addi t3,sp, (STACK_VEC_C_REAL)*4 ; xm.vldc b_imag}/* XAT Warning: 'LDAWSP has unknown offset - this may need correction' */ + { nop ; xm.vlmacc0 t3} + xm.vlmacc1 t3 + { addi t3,sp, (STACK_VEC_SAT)*4 ; nop} + xm.vlsat t3 + xm.vstrpv vec_tmp, bytemask + xm.vstrpv a_imag, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + + +.L_done: + xm.lddsp s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + xm.lddsp s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_macc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_macc.S new file mode 100644 index 00000000..380ed7e5 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_macc.S @@ -0,0 +1,219 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_conj_macc( + int16_t* acc_real, + int16_t* acc_imag, + const int16_t* b_real, + const int16_t* b_imag, + const int16_t* c_real, + const int16_t* c_imag, + const unsigned length, + const right_shift_t acc_shr, + const right_shift_t sat); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (12+8*(NSTACKVECS)+4) + + +#define STACK_SAT (NSTACKWORDS) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_SAT (NSTACKWORDS-16-4) + +#define STACK_BYTEMASK 8 + +#define FUNCTION_NAME vect_complex_s16_conj_macc + +#define acc_re x10 +#define acc_im x11 +#define b_re x12 +#define b_im x13 +#define c_re x18 +#define c_im x19 +#define len x20 +#define vec_tmp x21 +#define vec_sat x22 +#define bytemask x23 +#define acc_shr x23 + + +/* + We want: + + C.re <-- b.real * c.real + b.imag * c.imag + C.im <-- b.imag * c.real - b.real * c.imag + + We're doing this: + + vR <- -1 + vR <- -1 * b.real + vC <- -b.real + + tmp <- 0 + tmp <- vC * c.imag + vC <- b.imag + tmp <- tmp + vC * c.real + vR <- tmp >> sat + vR <- acc.imag + vR + acc.imag <- vR + + (vC still has b.imag) + tmp <- 0 + tmp <- vC * c.imag + vC <- b.real + tmp <- tmp + vC * c.real + vR <- tmp >> sat + vR <- acc.real + vR + acc.real <- vR +*/ + + +.L_neg_ones: + .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + { addi t3,sp, (STACK_VEC_SAT)*4 ; lw s2, (STACK_SAT)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli s3, s2, 16 ; xm.zexti s2, 16 } + { or s2, s2, s3 ; sw s8, 4 (sp)} + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + mv bytemask, a6 + { li t3, 32 ; nop} + mv c_re, a4 + { srli len, bytemask, 4 ; nop} + mv c_im, a5 + { xm.zexti bytemask, 4 ; nop} + { slli bytemask, bytemask, SIZEOF_LOG2_S16 ; slli t3, t3, 3 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { nop ; sw bytemask, (STACK_BYTEMASK)*4 (sp)} + mv acc_shr, a7 + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + + la t3, vpu_vec_0x8000 + { mv s8, t3 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { xm.mkmski t3, 32 ; nop } + xm.vlashr acc_re, acc_shr + xm.vstrpv acc_re, t3 + xm.vlashr acc_im, acc_shr + xm.vstrpv acc_im, t3 + { mv t3, s8 ; nop } + { addi len, len, -1 ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_re} + xm.vlmul1 b_re + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + + xm.vlsat vec_sat + { nop ; xm.vladd acc_im} + { li t3, 32 ; xm.vstr acc_im} + { add b_im, b_im, t3 ; xm.vclrdr } + { add acc_im, acc_im, t3 ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { add c_im, c_im, t3 ; xm.vldc b_re} + { add b_re, b_re, t3 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + { add c_re, c_re, t3 ; nop} + xm.vlsat vec_sat + { nop ; xm.vladd acc_re} + { add acc_re, acc_re, t3 ; xm.vstr acc_re} + { mv t3, s8 ; xm.bt len, .L_loop_top } + .L_loop_bot: + +#undef bytemask +#define bytemask len + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmsk bytemask, bytemask ; xm.brff bytemask, .L_done } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + xm.vlashr acc_re, acc_shr + xm.vstrpv acc_re, bytemask + xm.vlashr acc_im, acc_shr + xm.vstrpv acc_im, bytemask + { nop ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_re} + xm.vlmul1 b_re + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vstd vec_tmp} + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vladd acc_im} + xm.vstrpv acc_im, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vladd acc_re} + xm.vstrpv acc_re, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_nmacc.S new file mode 100644 index 00000000..37fb850b --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conj_nmacc.S @@ -0,0 +1,211 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_conj_nmacc( + int16_t* acc_real, + int16_t* acc_imag, + const int16_t* b_real, + const int16_t* b_imag, + const int16_t* c_real, + const int16_t* c_imag, + const unsigned length, + const right_shift_t acc_shr, + const right_shift_t sat); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (12+8*(NSTACKVECS)+4) + +#define STACK_SAT (NSTACKWORDS) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_SAT (NSTACKWORDS-16-4) + +#define STACK_BYTEMASK 8 + +#define FUNCTION_NAME vect_complex_s16_conj_nmacc + +#define acc_re x10 +#define acc_im x11 +#define b_re x12 +#define b_im x13 +#define c_re x18 +#define c_im x19 +#define len x20 +#define vec_tmp x21 +#define vec_sat x22 +#define bytemask x23 +#define acc_shr x23 + + +/* + We're doing this: + + vR <- -1 + vR <- -1 * b.real + vC <- -b.real + tmp <- 0 + tmp <- vC * c.imag + vC <- b.imag + tmp <- tmp + vC * c.real + vR <- tmp >> sat + vR <- acc.imag - vR + acc.imag <- vR + + (vC still has b.imag) + tmp <- 0 + tmp <- vC * c.imag + vC <- b.real + tmp <- tmp + vC * c.real + vR <- tmp >> sat + vR <- acc.real - vR + acc.real <- vR +*/ + + +.L_neg_ones: + .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + { addi t3,sp, (STACK_VEC_SAT)*4 ; lw s2, (STACK_SAT)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli s3, s2, 16 ; xm.zexti s2, 16 } + { or s2, s2, s3 ; sw s8, 4 (sp)} + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + mv bytemask, a6 + { li t3, 32 ; nop} + mv c_re, a4 + { srli len, bytemask, 4 ; nop} + mv c_im, a5 + { xm.zexti bytemask, 4 ; nop} + { slli bytemask, bytemask, SIZEOF_LOG2_S16 ; slli t3, t3, 3 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { nop ; sw bytemask, (STACK_BYTEMASK)*4 (sp)} + + mv acc_shr, a7 + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + la t3, vpu_vec_0x8000 + { mv s8, t3 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { xm.mkmski t3, 32 ; nop } + xm.vlashr acc_re, acc_shr + xm.vstrpv acc_re, t3 + xm.vlashr acc_im, acc_shr + xm.vstrpv acc_im, t3 + { mv t3, s8 ; nop } + { addi len, len, -1 ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_re} + xm.vlmul1 b_re + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vlsub acc_im} + { li t3, 32 ; xm.vstr acc_im} + { add b_im, b_im, t3 ; xm.vclrdr } + { add acc_im, acc_im, t3 ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { add c_im, c_im, t3 ; xm.vldc b_re} + { add b_re, b_re, t3 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + { add c_re, c_re, t3 ; nop} + xm.vlsat vec_sat + { nop ; xm.vlsub acc_re} + { add acc_re, acc_re, t3 ; xm.vstr acc_re} + { mv t3, s8 ; xm.bt len, .L_loop_top } + .L_loop_bot: + +#undef bytemask +#define bytemask len + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmsk bytemask, bytemask ; xm.brff bytemask, .L_done } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + xm.vlashr acc_re, acc_shr + xm.vstrpv acc_re, bytemask + xm.vlashr acc_im, acc_shr + xm.vstrpv acc_im, bytemask + { nop ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_re} + xm.vlmul1 b_re + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vstd vec_tmp} + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vlsub acc_im} + xm.vstrpv acc_im, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vlsub acc_re} + xm.vstrpv acc_re, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conjugate_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conjugate_mul.S new file mode 100644 index 00000000..fd84b54c --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_conjugate_mul.S @@ -0,0 +1,160 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_conj_mul( + int16_t* a_real, + int16_t* a_imag, + const int16_t* b_real, + const int16_t* b_imag, + const int16_t* c_real, + const int16_t* c_imag, + const unsigned length, + const right_shift_t sat); + +*/ + +#define FUNCTION_NAME vect_complex_s16_conj_mul + +#define NSTACKVECS (4) +#define NSTACKWORDS (12+8*(NSTACKVECS)+4) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_SAT (NSTACKWORDS-16-4) + +#define STACK_BYTEMASK 8 + + +#define a_re x10 +#define a_im x11 +#define b_re x12 +#define b_im x13 +#define c_re x18 +#define c_im x19 +#define len x20 +#define vec_tmp x21 +#define vec_sat x22 +#define bytemask x23 + + +.text +.p2align 2 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + mv s2, a7 + { addi t3,sp, (STACK_VEC_SAT)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli s3, s2, 16 ; xm.zexti s2, 16 } + { or s2, s2, s3 ; sw s8, 4 (sp)} + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + mv bytemask, a6 + { li t3, 32 ; nop} + mv c_re, a4 + { srli len, bytemask, 4 ; nop} + mv c_im, a5 + { xm.zexti bytemask, 4 ; nop} + { slli bytemask, bytemask, SIZEOF_LOG2_S16 ; slli t3, t3, 3 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + + la t3, vpu_vec_0x8000 + { mv s8, t3 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { addi len, len, -1 ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_re} + xm.vlmul1 b_re + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { li t3, 32 ; xm.vstr a_im} + { add b_im, b_im, t3 ; xm.vclrdr } + { add a_im, a_im, t3 ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { add c_im, c_im, t3 ; xm.vldc b_re} + { add b_re, b_re, t3 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + { add c_re, c_re, t3 ; nop} + xm.vlsat vec_sat + { add a_re, a_re, t3 ; xm.vstr a_re} + { mv t3, s8 ; xm.bt len, .L_loop_top } + .L_loop_bot: + + { xm.mkmsk bytemask, bytemask ; xm.brff bytemask, .L_done } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + { nop ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_re} + xm.vlmul1 b_re + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vstd vec_tmp} + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + xm.vstrpv a_im, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + xm.vstrpv a_re, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_macc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_macc.S new file mode 100644 index 00000000..ecd925c2 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_macc.S @@ -0,0 +1,212 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_macc( + int16_t* acc_real, + int16_t* acc_imag, + const int16_t* b_real, + const int16_t* b_imag, + const int16_t* c_real, + const int16_t* c_imag, + const unsigned length, + const right_shift_t acc_shr, + const right_shift_t sat); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (12+8*(NSTACKVECS)+4) + +#define STACK_SAT (NSTACKWORDS) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_SAT (NSTACKWORDS-16-4) + +#define STACK_BYTEMASK 8 + +#define FUNCTION_NAME vect_complex_s16_macc + +#define acc_re x10 +#define acc_im x11 +#define b_re x12 +#define b_im x13 +#define c_re x18 +#define c_im x19 +#define len x20 +#define vec_tmp x21 +#define vec_sat x22 +#define bytemask x23 +#define acc_shr x23 + + +/* + We're doing this: + + vR <- -1 + vR <- -1 * b.imag + vC <- -b.imag + tmp <- 0 + tmp <- vC * c.imag + vC <- b.real + tmp <- tmp + vC * c.real + vR <- tmp >> sat + vR <- acc.real + vR + acc.real <- vR + + (vC still has b.real) + tmp <- 0 + tmp <- vC * c.imag + vC <- b.imag + tmp <- tmp + vC * c.real + vR <- tmp >> sat + vR <- acc.imag + vR + acc.imag <- vR +*/ + + +.L_neg_ones: + .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + { addi t3,sp, (STACK_VEC_SAT)*4 ; lw s2, (STACK_SAT)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli s3, s2, 16 ; xm.zexti s2, 16 } + { or s2, s2, s3 ; sw s8, 4 (sp)} + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + mv bytemask, a6 + { li t3, 32 ; nop} + mv c_re, a4 + { srli len, bytemask, 4 ; nop} + mv c_im, a5 + { xm.zexti bytemask, 4 ; nop} + { slli bytemask, bytemask, SIZEOF_LOG2_S16 ; slli t3, t3, 3 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { nop ; sw bytemask, (STACK_BYTEMASK)*4 (sp)} + mv acc_shr, a7 + { nop ; nop} + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + + la t3, vpu_vec_0x8000 + { mv s8, t3 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { xm.mkmski t3, 32 ; nop } + xm.vlashr acc_re, acc_shr + xm.vstrpv acc_re, t3 + xm.vlashr acc_im, acc_shr + xm.vstrpv acc_im, t3 + { mv t3, s8 ; nop } + { addi len, len, -1 ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_im} + xm.vlmul1 b_im + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vladd acc_re} + { li t3, 32 ; xm.vstr acc_re} + { add b_re, b_re, t3 ; xm.vclrdr } + { add acc_re, acc_re, t3 ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { add c_im, c_im, t3 ; xm.vldc b_im} + { add b_im, b_im, t3 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + { add c_re, c_re, t3 ; nop} + xm.vlsat vec_sat + { nop ; xm.vladd acc_im} + { add acc_im, acc_im, t3 ; xm.vstr acc_im} + { mv t3, s8 ; xm.bt len, .L_loop_top } + .L_loop_bot: + +#undef bytemask +#define bytemask len + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmsk bytemask, bytemask ; xm.brff bytemask, .L_done } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + xm.vlashr acc_re, acc_shr + xm.vstrpv acc_re, bytemask + xm.vlashr acc_im, acc_shr + xm.vstrpv acc_im, bytemask + { nop ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_im} + xm.vlmul1 b_im + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vstd vec_tmp} + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vladd acc_re} + xm.vstrpv acc_re, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vladd acc_im} + xm.vstrpv acc_im, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mag.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mag.S new file mode 100644 index 00000000..412a6134 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mag.S @@ -0,0 +1,269 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_mag( + int16_t a[], + const int16_t b_real[], + const int16_t b_imag[], + const unsigned length, + const right_shift_t b_shr, + const int16_t* rot_table, + const unsigned table_rows) + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (8) +#define NSTACKWORDS (8+(8*NSTACKVECS)+4) + + +#define STACK_VEC_TMP_IMAG (NSTACKWORDS-8-4) +#define STACK_VEC_TMP_REAL (NSTACKWORDS-16-4) +#define STACK_VEC_TMP2 (NSTACKWORDS-24-4) +#define STACK_VEC_NEG_ONES (NSTACKWORDS-40-4) +#define STACK_VEC_SAT (NSTACKWORDS-32-4) + +#define FUNCTION_NAME vect_complex_s16_mag + +#define Q(R) R + +#define a x10 +#define b_real x11 +#define b_imag x12 +#define length x13 +#define b_shr x18 +#define _32 x19 +#define vec_neg_one x20 +#define mask32 x21 +#define tail_bytes x22 +#define iter x23 +#define table x24 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + { li _32, 32 ; sw s8, 4 (sp)} + + { slli t3, _32, 3 ; mv tail_bytes, length } + { srli length, length, 4 ; xm.vsetc t3} + { nop ; li t3, 15 } + + { slli s8, t3, 16 ; xm.zexti tail_bytes, 4 } + { or t3, t3, s8 ; xm.mkmski mask32, 32 } + xm.stdsp t3,t3,(STACK_VEC_SAT/2 + 0)*8 + xm.stdsp t3,t3,(STACK_VEC_SAT/2 + 1)*8 + xm.stdsp t3,t3,(STACK_VEC_SAT/2 + 2)*8 + xm.stdsp t3,t3,(STACK_VEC_SAT/2 + 3)*8 + li s8, 0xC000 + { slli s7, s8, 16 ; slli tail_bytes, tail_bytes, 1 } + { or s8, s8, s7 ; nop} + addi vec_neg_one, sp, STACK_VEC_NEG_ONES*4 + + xm.stdi s8,s8, 0(vec_neg_one) + xm.stdi s8,s8, 8(vec_neg_one) + xm.stdi s8,s8, 16(vec_neg_one) + xm.stdi s8,s8, 24(vec_neg_one) + + { xm.mkmsk tail_bytes, tail_bytes ; nop} + mv b_shr, a4 + { addi t3,sp, (STACK_VEC_TMP_REAL)*4 ; nop }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + beqz length, .L_outer_loop_bot + + .L_outer_loop_top: + xm.vlashr b_real, b_shr + xm.vstrpv t3, mask32 + { add b_real, b_real, _32 ; xm.vsign } + addi Q(iter),sp, (STACK_VEC_TMP_IMAG)*4 + {nop ; xm.vlmul0 t3} + xm.vlmul1 t3 + xm.vstrpv t3, mask32 + + {nop; xm.vladd t3} /// + xm.vstrpv t3, mask32 /// + + + xm.vlashr b_imag, b_shr + xm.vstrpv Q(iter), mask32 + { add b_imag, b_imag, _32 ; xm.vsign } + { nop ; xm.vlmul0 Q(iter)} + xm.vlmul1 Q(iter) + xm.vstrpv Q(iter), mask32 + + {nop; xm.vladd Q(iter)} /// + xm.vstrpv Q(iter), mask32 /// + + + mv table, a5 + addi t3,sp, (STACK_VEC_TMP_IMAG)*4 + mv iter, a6 + + .L_inner_loop_top: + // {addi t3, sp, STACK_VEC_NEG_ONES*4; xm.vclrdr} + + + { nop ; xm.vldr t3} + { addi t3,sp, (STACK_VEC_TMP2)*4 ; xm.vlmul0 vec_neg_one} + xm.vlmul1 vec_neg_one + xm.vstrpv t3, mask32 + + {nop; xm.vladd t3} /// + xm.vstrpv t3, mask32 /// + + + { nop ; xm.vclrdr } + { add table, table, _32 ; xm.vldc t3} + { addi t3,sp, (STACK_VEC_TMP_REAL)*4 ; xm.vlmacc0 table} + xm.vlmacc1 table + { sub table, table, _32 ; xm.vldc t3} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 table} + xm.vlmacc1 table + xm.vlsat t3 + addi t3,sp, (STACK_VEC_TMP_REAL)*4 + + xm.vstrpv t3, mask32 + { add table, table, _32 ; xm.vclrdr } + addi t3,sp, (STACK_VEC_TMP_IMAG)*4 + {nop ; xm.vlmacc0 table} + xm.vlmacc1 table + { sub table, table, _32 ; xm.vldc t3} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 table} + xm.vlmacc1 table + xm.vlsat t3 + addi t3,sp, (STACK_VEC_TMP_IMAG)*4 + + xm.vstrpv t3, mask32 + { add table, table, _32 ; xm.vsign } + { addi iter, iter, -1 ; xm.vlmul0 t3} // imag = |imag| + xm.vlmul1 t3 + xm.vstrpv t3, mask32 + + {nop; xm.vladd t3} /// + xm.vstrpv t3, mask32 /// + + { add table, table, _32 ; xm.bt iter, .L_inner_loop_top } + + { addi t3,sp, (STACK_VEC_TMP_REAL)*4 ; nop } + { addi length, length, -1 ; xm.vldr t3} + { add a, a, _32 ; xm.vstr a} + bnez length, .L_outer_loop_top + .L_outer_loop_bot: + + { addi t3,sp, (STACK_VEC_TMP_REAL)*4 ; nop }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + beqz tail_bytes, .L_done + xm.vlashr b_real, b_shr + xm.vstrpv t3, tail_bytes + { add b_real, b_real, _32 ; xm.vsign } + addi Q(iter),sp, (STACK_VEC_TMP_IMAG)*4 + {nop ; xm.vlmul0 t3} + xm.vlmul1 t3 + xm.vstrpv t3, mask32 + + {nop; xm.vladd t3} /// + xm.vstrpv t3, mask32 /// + + xm.vlashr b_imag, b_shr + xm.vstrpv Q(iter), tail_bytes + { add b_imag, b_imag, _32 ; xm.vsign } + { nop ; xm.vlmul0 Q(iter)} + xm.vlmul1 Q(iter) + xm.vstrpv Q(iter), mask32 + + {nop; xm.vladd Q(iter)} /// + xm.vstrpv Q(iter), mask32 /// + + + mv table, a5 + addi t3,sp, (STACK_VEC_TMP_IMAG)*4 + mv iter, a6 + + .L_inner_loop2_top: + // { ldaw x28, sp[STACK_VEC_NEG_ONES] ; vclrdr } + addi t3,sp, (STACK_VEC_TMP_IMAG)*4 + {nop; xm.vldr t3} + { addi t3,sp, (STACK_VEC_TMP2)*4 ; xm.vlmul0 vec_neg_one} + xm.vlmul1 vec_neg_one + xm.vstrpv t3, mask32 + + {nop; xm.vladd t3} /// + xm.vstrpv t3, mask32 /// + + { nop ; xm.vclrdr } + { add table, table, _32 ; xm.vldc t3} + { addi t3,sp, (STACK_VEC_TMP_REAL)*4 ; xm.vlmacc0 table} + xm.vlmacc1 table + { sub table, table, _32 ; xm.vldc t3} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 table} + xm.vlmacc1 table + xm.vlsat t3 + addi t3,sp, (STACK_VEC_TMP_REAL)*4 + + xm.vstrpv t3, mask32 + { add table, table, _32 ; xm.vclrdr } + addi t3,sp, (STACK_VEC_TMP_IMAG)*4 + {nop ; xm.vlmacc0 table} + xm.vlmacc1 table + { sub table, table, _32 ; xm.vldc t3} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 table} + xm.vlmacc1 table + xm.vlsat t3 + addi t3,sp, (STACK_VEC_TMP_IMAG)*4 + + xm.vstrpv t3, mask32 + { add table, table, _32 ; xm.vsign } + { addi iter, iter, -1 ; xm.vlmul0 t3} // imag = |imag| + xm.vlmul1 t3 + xm.vstrpv t3, mask32 + + {nop; xm.vladd t3} /// + xm.vstrpv t3, mask32 /// + + { add table, table, _32 ; xm.bt iter, .L_inner_loop2_top } + + { addi t3,sp, (STACK_VEC_TMP_REAL)*4 ; xm.vclrdr } + { nop ; xm.vldr t3} + { nop ; xm.vstd t3} + xm.vstrpv t3, tail_bytes + xm.vstrpv a, tail_bytes + { nop ; xm.vldd t3} + { nop ; xm.vstd t3} + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mul.S new file mode 100644 index 00000000..d840c05d --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_mul.S @@ -0,0 +1,186 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_mul( + int16_t* a_real, + int16_t* a_imag, + const int16_t* b_real, + const int16_t* b_imag, + const int16_t* c_real, + const int16_t* c_imag, + const unsigned length, + const right_shift_t sat); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (12+8*(NSTACKVECS)+4) + + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_SAT (NSTACKWORDS-16-4) + +#define FUNCTION_NAME vect_complex_s16_mul + +#define a_re x10 +#define a_im x11 +#define b_re x12 +#define b_im x13 +#define c_re x18 +#define c_im x19 +#define len x20 +#define vec_tmp x21 +#define vec_sat x22 +#define bytemask x23 + + + +/* + We're doing this: + + vR <- -1 + vR <- -1 * b.imag + vC <- -b.imag + acc <- 0 + acc <- vC * c.imag + vC <- b.real + acc <- acc + vC * c.real + vR <- acc >> sat + a.real <- vR + + (vC still has b.real) + acc <- 0 + acc <- vC * c.imag + vC <- b.imag + acc <- acc + vC * c.real + vR <- acc >> sat + a.imag <- vR +*/ + + +//.L_neg_ones: + // .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + mv s2, a7 + { addi t3,sp, (STACK_VEC_SAT)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli s3, s2, 16 ; xm.zexti s2, 16 } + { or s2, s2, s3 ; sw s8, 4 (sp)} + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + mv bytemask, a6 + { li t3, 32 ; nop} + mv c_re, a4 + { srli len, bytemask, 4 ; nop} + mv c_im, a5 + { xm.zexti bytemask, 4 ; nop} + { slli bytemask, bytemask, SIZEOF_LOG2_S16 ; slli t3, t3, 3 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + la t3, vpu_vec_0x8000 + { mv s8, t3 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { addi len, len, -1 ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_im} + xm.vlmul1 b_im + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { li t3, 32 ; xm.vstr a_re} + + + { add b_re, b_re, t3 ; xm.vclrdr } + { add a_re, a_re, t3 ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { add c_im, c_im, t3 ; xm.vldc b_im} + { add b_im, b_im, t3 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + { add c_re, c_re, t3 ; nop} + xm.vlsat vec_sat + { add a_im, a_im, t3 ; xm.vstr a_im} + { mv t3, s8 ; xm.bt len, .L_loop_top } + .L_loop_bot: + + { xm.mkmsk bytemask, bytemask ; xm.brff bytemask, .L_done } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + { nop ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_im} + xm.vlmul1 b_im + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vstd vec_tmp} + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + xm.vstrpv a_re, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + xm.vstrpv a_im, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_nmacc.S new file mode 100644 index 00000000..e68d09e2 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_nmacc.S @@ -0,0 +1,211 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_nmacc( + int16_t* acc_real, + int16_t* acc_imag, + const int16_t* b_real, + const int16_t* b_imag, + const int16_t* c_real, + const int16_t* c_imag, + const unsigned length, + const right_shift_t acc_shr, + const right_shift_t sat); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (12+8*(NSTACKVECS)+4) + +#define STACK_SAT (NSTACKWORDS) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_SAT (NSTACKWORDS-16-4) + +#define STACK_BYTEMASK 8 + +#define FUNCTION_NAME vect_complex_s16_nmacc + +#define acc_re x10 +#define acc_im x11 +#define b_re x12 +#define b_im x13 +#define c_re x18 +#define c_im x19 +#define len x20 +#define vec_tmp x21 +#define vec_sat x22 +#define bytemask x23 +#define acc_shr x23 + + +/* + We're doing this: + + vR <- -1 + vR <- -1 * b.imag + vC <- -b.imag + tmp <- 0 + tmp <- vC * c.imag + vC <- b.real + tmp <- tmp + vC * c.real + vR <- tmp >> sat + vR <- acc.real - vR + acc.real <- vR + + (vC still has b.real) + tmp <- 0 + tmp <- vC * c.imag + vC <- b.imag + tmp <- tmp + vC * c.real + vR <- tmp >> sat + vR <- acc.imag - vR + acc.imag <- vR +*/ + + +.L_neg_ones: + .short -0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000,-0x4000 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + { addi t3,sp, (STACK_VEC_SAT)*4 ; lw s2, (STACK_SAT)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli s3, s2, 16 ; xm.zexti s2, 16 } + { or s2, s2, s3 ; sw s8, 4 (sp)} + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + mv bytemask, a6 + { li t3, 32 ; nop} + mv c_re, a4 + { srli len, bytemask, 4 ; nop} + mv c_im, a5 + { xm.zexti bytemask, 4 ; nop} + { slli bytemask, bytemask, SIZEOF_LOG2_S16 ; slli t3, t3, 3 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { nop ; sw bytemask, (STACK_BYTEMASK)*4 (sp)} + mv acc_shr, a7 + { nop ; nop} + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + la t3, vpu_vec_0x8000 + { mv s8, t3 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { xm.mkmski t3, 32 ; nop } + xm.vlashr acc_re, acc_shr + xm.vstrpv acc_re, t3 + xm.vlashr acc_im, acc_shr + xm.vstrpv acc_im, t3 + { mv t3, s8 ; nop } + { addi len, len, -1 ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_im} + xm.vlmul1 b_im + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vlsub acc_re} + { li t3, 32 ; xm.vstr acc_re} + { add b_re, b_re, t3 ; xm.vclrdr } + { add acc_re, acc_re, t3 ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { add c_im, c_im, t3 ; xm.vldc b_im} + { add b_im, b_im, t3 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + { add c_re, c_re, t3 ; nop} + xm.vlsat vec_sat + { nop ; xm.vlsub acc_im} + { add acc_im, acc_im, t3 ; xm.vstr acc_im} + { mv t3, s8 ; xm.bt len, .L_loop_top } + .L_loop_bot: + +#undef bytemask +#define bytemask len + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmsk bytemask, bytemask ; xm.brff bytemask, .L_done } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + xm.vlashr acc_re, acc_shr + xm.vstrpv acc_re, bytemask + xm.vlashr acc_im, acc_shr + xm.vstrpv acc_im, bytemask + { nop ; xm.vldr t3} + { xm.mkmski t3, 32 ; xm.vlmul0 b_im} + xm.vlmul1 b_im + xm.vstrpv vec_tmp, t3 + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vstd vec_tmp} + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_re} + { addi t3,sp, (STACK_VEC_SAT)*4 ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vlsub acc_re} + xm.vstrpv acc_re, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 c_im} + xm.vlmacc1 c_im + { nop ; xm.vldc b_im} + { nop ; xm.vlmacc0 c_re} + xm.vlmacc1 c_re + xm.vlsat vec_sat + { nop ; xm.vlsub acc_im} + xm.vstrpv acc_im, bytemask + xm.vstrpv vec_tmp, bytemask + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_real_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_real_mul.S new file mode 100644 index 00000000..faced79a --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_real_mul.S @@ -0,0 +1,142 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_real_mul( + int16_t* a_real, + int16_t* a_imag, + const int16_t* b_real, + const int16_t* b_imag, + const int16_t c[], + const unsigned length, + const right_shift_t sat); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (16+8*(NSTACKVECS)+4) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_SAT (NSTACKWORDS-16-4) + + +#define FUNCTION_NAME vect_complex_s16_real_mul + +#define a_re x10 +#define a_im x11 +#define b_re x12 +#define b_im x13 +#define c x18 +#define len x19 +#define vec_tmp x20 +#define vec_sat x21 +#define bytemask x22 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + mv s2, a6 + { addi t3,sp, (STACK_VEC_SAT)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli s3, s2, 16 ; xm.zexti s2, 16 } + { or s2, s2, s3 ; sw s8, 4 (sp)} + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + + mv len, a5 + { li t3, 32 ; mv bytemask, len} + mv c, a4 + { srli len, bytemask, 4 ; nop} + { xm.zexti bytemask, 4 ; nop } + { slli bytemask, bytemask, SIZEOF_LOG2_S16 ; slli t3, t3, 3 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, bytemask, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + { addi vec_sat,sp, (STACK_VEC_SAT)*4 ; nop } + { li t3, 32 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +#define _32 x28 + .L_loop_top: + { addi len, len, -1 ; xm.vclrdr } + { add c, c, _32 ; xm.vldc c} + { nop ; xm.vlmacc0 b_re} + xm.vlmacc1 b_re + { add b_re, b_re, _32 ; nop} + xm.vlsat vec_sat + { add a_re, a_re, _32 ; xm.vstr a_re} + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 b_im} + xm.vlmacc1 b_im + { add b_im, b_im, _32 ; nop} + xm.vlsat vec_sat + { add a_im, a_im, _32 ; xm.vstr a_im} + { nop ; xm.bt len, .L_loop_top } + + .L_loop_bot: +#undef _32 + + { xm.mkmsk bytemask, bytemask ; xm.brff bytemask, .L_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + + + xm.vclrdr + xm.vstd vec_tmp + xm.vldc c + xm.vlmacc0 b_re + xm.vlmacc1 b_re + xm.vlsat vec_sat + xm.vstrpv a_re, bytemask + xm.vstrpv vec_tmp, bytemask + xm.vldd vec_tmp + xm.vstd vec_tmp + xm.vclrdr + xm.vlmacc0 b_im + xm.vlmacc1 b_im + xm.vlsat vec_sat + xm.vstrpv a_im, bytemask + xm.vstrpv vec_tmp, bytemask + xm.vldd vec_tmp + xm.vstd vec_tmp + nop + +.p2align 2 + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_squared_mag.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_squared_mag.S new file mode 100644 index 00000000..730e4313 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_squared_mag.S @@ -0,0 +1,120 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s16_squared_mag( + int16_t a[], + const int16_t* b_real, + const int16_t* b_imag, + const unsigned length, + const right_shift_t sat); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (8+8*(NSTACKVECS)+4) + + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_SAT (NSTACKWORDS-16-4) + +#define FUNCTION_NAME vect_complex_s16_squared_mag + +#define a x10 +#define b_re x11 +#define b_im x12 +#define length x13 +#define vec_tmp x18 +#define vec_sat x19 +#define bytemask x20 +#define _32 x21 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + mv s2, a4 + { addi t3,sp, (STACK_VEC_SAT)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli s3, s2, 16 ; xm.zexti s2, 16 } + { or s2, s2, s3 ; li _32, 32 } + xm.stdi s2,s2, 0(t3) + xm.stdi s2,s2, 8(t3) + xm.stdi s2,s2, 16(t3) + xm.stdi s2,s2, 24(t3) + + { nop ; slli bytemask, length, SIZEOF_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli bytemask, length, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { srli length, length, 4 ; slli t3, _32, 3 } + { xm.zexti bytemask, 5 ; xm.vsetc t3} +{ xm.ldawsp t3, STACK_VEC_SAT *4 ; nop} +{nop ; xm.ldawsp vec_tmp, STACK_VEC_TMP*4 } + { xm.mkmsk bytemask, bytemask ; xm.brff length, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + + { addi length, length, -1 ; xm.vclrdr } + { nop ; xm.vldc b_re} + { nop ; xm.vlmacc0 b_re} + xm.vlmacc1 b_re + { add b_re, b_re, _32 ; nop} + { nop ; xm.vldc b_im} + { nop ; xm.vlmacc0 b_im} + xm.vlmacc1 b_im + { add b_im, b_im, _32 ; nop} + xm.vlsat t3 + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt length, .L_loop_top } + + .L_loop_bot: //astew: worth jumping over to single issue mode for this? + + { nop ; xm.brff bytemask, .L_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vstd vec_tmp} + { nop ; xm.vldc b_re} + { nop ; xm.vlmacc0 b_re} + xm.vlmacc1 b_re + { nop ; xm.vldc b_im} + { nop ; xm.vlmacc0 b_im} + xm.vlmacc1 b_im + xm.vlsat t3 + { nop ; xm.vstrpv a, bytemask } + { nop ; xm.vstrpv vec_tmp, bytemask } + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_sum.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_sum.S new file mode 100644 index 00000000..754b9dd3 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_sum.S @@ -0,0 +1,156 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +complex_s16_t vect_complex_s16_sum( + const int16_t* b_real, + const int16_t* b_imag, + const unsigned length, + const right_shift_t sat); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (6) +#define NSTACKWORDS (8+(8*NSTACKVECS)+4) + +#define b_real x10 +#define b_imag x11 +#define length x12 +#define sat x13 +#define _32 x18 +#define tmp x19 +#define tail_bytes x20 + +#define STACK_VEC_ZEROS (NSTACKWORDS- 8-16-4) +#define STACK_VEC_TMP (NSTACKWORDS-16-16-4) +#define STACK_VEC_TMP2 (NSTACKWORDS-8-2) + +#define STACK_LENGTH 6 +#define STACK_SAT 7 + +#define FUNCTION_NAME vect_complex_s16_sum_OLLD //why did I think I needed to write this in assembly? + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + { addi s6,sp, (STACK_VEC_TMP2)*4 ; nop } + addi s7, s6, (-30) + + li t3, 0x0100 + { addi tmp, t3, 1 ; xm.vsetc t3} + { srli length, length, 4 ; slli tail_bytes, length, 1 } + { nop ; xm.zexti tail_bytes, 5 } + { nop ; sw length, (STACK_LENGTH)*4 (sp)} + { nop ; sw sat, (STACK_SAT)*4 (sp)} + + { addi t3,sp, (STACK_VEC_ZEROS)*4 ; xm.vclrdr } + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vstd t3} + + xm.stdi tmp,tmp, 0(t3) + xm.stdi tmp,tmp, 8(t3) + xm.stdi tmp,tmp, 16(t3) + xm.stdi tmp,tmp, 24(t3) + + { li _32, 32 ; xm.vldc t3} + { addi t3,sp, (STACK_VEC_ZEROS)*4 ; xm.brff length, .L_real_bot }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_real_top: + + { addi length, length, -1 ; xm.vlmaccr0 b_real} + xm.vlmaccr1 b_real + {nop ; xm.vstd s6} + {nop ; xm.vldd s7} + {nop ; xm.vstr s6} + {nop ; xm.vldr s7} + + + { add b_real, b_real, _32 ; xm.bt length, .L_real_top } + + .L_real_bot: + + { sub t3, t3, tail_bytes ; xm.brff tail_bytes, .L_real_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vldc t3} + { xm.mkmski tmp, 2 ; xm.vlmaccr0 b_imag} + xm.vlmaccr1 b_imag + {nop ; xm.vstd s6} + {nop ; xm.vldd s7} + {nop ; xm.vstr s6} + {nop ; xm.vldr s7} + + { addi t3,sp, (STACK_SAT)*4 ; nop /* xm.vadddr */ } + { addi t3,sp, 4 ; nop} + xm.vlsat t3 + xm.vstrpv t3, tmp + { nop ; lw a0, 4 (sp)} + + .L_real_end: + + { nop ; lw length, (STACK_LENGTH)*4 (sp)} + { addi t3,sp, (STACK_VEC_ZEROS)*4 ; xm.vclrdr } + + + .L_imag_top: + + { addi length, length, -1 ; xm.vlmaccr0 b_imag} + xm.vlmaccr1 b_imag + {nop ; xm.vstd s6} + {nop ; xm.vldd s7} + {nop ; xm.vstr s6} + {nop ; xm.vldr s7} + + { add b_imag, b_imag, _32 ; xm.bt length, .L_real_top } + + .L_imag_bot: + + { sub t3, t3, tail_bytes ; xm.brff tail_bytes, .L_imag_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vldc t3} + { xm.mkmski tmp, 2 ; xm.vlmaccr0 b_imag} + xm.vlmaccr1 b_imag + {nop ; xm.vstd s6} + {nop ; xm.vldd s7} + {nop ; xm.vstr s6} + {nop ; xm.vldr s7} + + { addi t3,sp, (STACK_SAT)*4 ; nop /* xm.vadddr */ } + { addi t3,sp, 4 ; nop} + xm.vlsat t3 + xm.vstrpv t3, tmp + { nop ; lw a0, 4 (sp)} + + .L_imag_end: + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_to_complex_s32.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_to_complex_s32.S new file mode 100644 index 00000000..88b1e5d9 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s16/vect_complex_s16_to_complex_s32.S @@ -0,0 +1,60 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +void vect_complex_s16_to_vect_complex_s32( + complex_s32_t* a, + const int16_t* b_real, + const int16_t* b_imag, + const unsigned length); +*/ + +#include "../asm_helper.h" + +#define NSTACKVECS 0 +#define NSTACKWORDS (4 + (8*NSTACKVECS)+4) + +#define FUNCTION_NAME vect_complex_s16_to_vect_complex_s32 + +#define a x10 +#define b_real x11 +#define b_imag x12 +#define length x13 +#define tmp_real x18 +#define tmp_imag x19 + +.text; /* Translation error on this line: unexpected token at position 5. */ +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.p2align 4 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in entsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + + // Can this be done faster? Or with the VPU at all? + .L_loop_top: + addi length, length, -1 + xm.ld16s tmp_real, length(b_real) + xm.ld16s tmp_imag, length(b_imag) + xm.std tmp_real,tmp_imag, length(a) + bnez length, .L_loop_top + + xm.lddsp s3,s2,8 + xm.retsp (NSTACKWORDS)*4 /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + //.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 37. */ + .set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 46. */ + .set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 33. */ + .set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 34. */ + .set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 36. */ + +.L_func_end: + .size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_complex_scale.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_complex_scale.S new file mode 100644 index 00000000..9bdb740a --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_complex_scale.S @@ -0,0 +1,151 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_scale( + complex_s32_t* a, + const complex_s32_t* b, + const int32_t c_real, + const int32_t c_imag, + const unsigned length, + const right_shift_t b_shr, + const right_shift_t c_shr); + +*/ + +.text +.p2align 2 + + +#define NSTACKWORDS (8+8+4) + +#define a x10 +#define b x11 +#define c_real x12 +#define c_imag x13 +#define length x18 +#define b_shr x19 + +#define _32 x20 +#define tmp_vec x21 + + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define FUNCTION_NAME vect_complex_s32_scale + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; li t3, 0 } + { li _32, 32 ; xm.vsetc t3} + + xm.stdi c_real,c_imag, 0(tmp_vec) + xm.stdi c_real,c_imag, 8(tmp_vec) + xm.stdi c_real,c_imag, 16(tmp_vec) + xm.stdi c_real,c_imag, 24(tmp_vec) + + mv c_real, a6 + { xm.mkmski c_imag, 32 ; nop} + xm.vlashr tmp_vec, c_real + xm.vstrpv tmp_vec, c_imag + + { nop ; xm.vldc tmp_vec} + + #undef c_real + #undef c_imag + #define vec_count x12 + #define tail_bytes x13 + + mv length, a4 + // { nop ; lw length, (STACK_LENGTH)*4 (sp)} + mv b_shr, a5 + { slli tail_bytes, length, 3 ; nop} + { xm.zexti tail_bytes, 5 ; srli vec_count, length, 2 } + + #undef length + + { xm.mkmsk tail_bytes, tail_bytes ; xm.mkmski t3, 32 } + { nop ; xm.brff vec_count, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + + .L_loop_top: + xm.vlashr b, b_shr + xm.vstrpv tmp_vec, t3 + { addi vec_count, vec_count, -1 ; xm.vldd tmp_vec} + { add b, b, _32 ; xm.vcmr0 } + { nop ; xm.vcmi0 } + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt vec_count, .L_loop_top } + +.L_loop_bot: + + { nop ; xm.brff tail_bytes, .L_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + nop + + + xm.vclrdr + xm.vstd tmp_vec + xm.vlashr b, b_shr + xm.vstrpv tmp_vec, tail_bytes + xm.vldd tmp_vec + xm.vcmr0 + xm.vcmi0 + xm.vstrpv tmp_vec, tail_bytes + xm.vstrpv a, tail_bytes + xm.vldd tmp_vec + xm.vstd tmp_vec + +.p2align 2 + + + // { ; bf tail_bytes, .L_done } + // { ; vclrdr } + // { ; vstd tmp_vec[0] } + // vlashr b[0], b_shr + // vstrpv tmp_vec[0], tail_bytes + // { ; vldd tmp_vec[0] } + // { ; vcmr } + // { ; vcmi } + // vstrpv tmp_vec[0], tail_bytes + // vstrpv a[0], tail_bytes + // { ; vldd tmp_vec[0] } + // { ; vstd tmp_vec[0] } + +.p2align 2 +.L_done: + xm.lddsp s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + xm.lddsp s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_macc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_macc.S new file mode 100644 index 00000000..e7fcd926 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_macc.S @@ -0,0 +1,129 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_conj_macc( + complex_s32_t* acc, + const complex_s32_t* b, + const complex_s32_t* c, + const unsigned length, + const right_shift_t acc_shr, + const right_shift_t b_shr, + const right_shift_t c_shr); + +*/ + +.text +.p2align 2 + + +#define NSTACKWORDS (8+8+4) + + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_BYTEMASK 1 + +#define acc x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define tmp x20 +#define tmp_vec x21 +#define shr_acc x22 + +#define bytemask len + +#define FUNCTION_NAME vect_complex_s32_conj_macc + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + mv shr_acc,a4 + mv shr_b, a5 + mv shr_c, a6 + { slli t3, len, 3 ; nop} + { li tmp, 32 ; nop} + { xm.zexti t3, 5 ; srli len, len, 2 } + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; xm.mkmsk t3, t3 } + { li t3, 0 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmski t3, 32 ; xm.vsetc t3} + { addi len, len, -1 ; xm.brff len, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top_s32: + xm.vlashr acc, shr_acc + xm.vstrpv acc, t3 + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldc tmp_vec} + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldd tmp_vec} + { add b, b, tmp ; xm.vcmcr0 } + { add c, c, tmp ; xm.vcmci0 } + { nop ; xm.vladd acc} + { add acc, acc, tmp ; xm.vstr acc} + { addi len, len, -1 ; xm.bt len, .L_loop_top_s32 } + +.L_loop_bot_s32: + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_done_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vstd tmp_vec} + xm.vlashr acc, shr_acc + xm.vstrpv acc, bytemask + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldc tmp_vec} + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldd tmp_vec} + { nop ; xm.vcmcr0 } + { mv t3, tmp_vec ; xm.vcmci0 } + { nop ; xm.vladd acc} + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr tmp_vec} + xm.vstrpv acc, bytemask + +.L_done_s32: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_nmacc.S new file mode 100644 index 00000000..2d54bf94 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conj_nmacc.S @@ -0,0 +1,129 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_conj_nmacc( + complex_s32_t* acc, + const complex_s32_t* b, + const complex_s32_t* c, + const unsigned length, + const right_shift_t acc_shr, + const right_shift_t b_shr, + const right_shift_t c_shr); + +*/ + +.text +.p2align 2 + + +#define NSTACKWORDS (8+8+4) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_BYTEMASK 1 + +#define acc x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define tmp x20 +#define tmp_vec x21 +#define shr_acc x22 + +#define bytemask len + +#define FUNCTION_NAME vect_complex_s32_conj_nmacc + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + mv shr_acc, a4 + { nop ; nop} + mv shr_b, a5 + { slli t3, len, 3 ; nop} + mv shr_c, a6 + { li tmp, 32 ; nop} + { xm.zexti t3, 5 ; srli len, len, 2 } + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; xm.mkmsk t3, t3 } + { li t3, 0 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmski t3, 32 ; xm.vsetc t3} + { addi len, len, -1 ; xm.brff len, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top_s32: + xm.vlashr acc, shr_acc + xm.vstrpv acc, t3 + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldc tmp_vec} + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldd tmp_vec} + { add b, b, tmp ; xm.vcmcr0 } + { add c, c, tmp ; xm.vcmci0 } + { nop ; xm.vlsub acc} + { add acc, acc, tmp ; xm.vstr acc} + { addi len, len, -1 ; xm.bt len, .L_loop_top_s32 } + +.L_loop_bot_s32: + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_done_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vstd tmp_vec} + xm.vlashr acc, shr_acc + xm.vstrpv acc, bytemask + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldc tmp_vec} + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldd tmp_vec} + { nop ; xm.vcmcr0 } + { mv t3, tmp_vec ; xm.vcmci0 } + { nop ; xm.vlsub acc} + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr tmp_vec} + xm.vstrpv acc, bytemask + +.L_done_s32: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate.S new file mode 100644 index 00000000..02777efd --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate.S @@ -0,0 +1,83 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* +headroom_t vect_complex_s32_conjugate( + complex_s32_t* a, + const complex_s32_t* b, + const unsigned length); +*/ + +.text +.p2align 2 + +#define NSTACKVECTS (1) +#define NSTACKWORDS (4 + 8*(NSTACKVECTS)+4) + +#define STACK_VEC_TMP (NSTACKWORDS - 8-4) + +#define a x10 +#define b x11 +#define len x12 + +#define _32 x13 +#define tail x18 + +#define FUNCTION_NAME vect_complex_s32_conjugate + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + { li _32, 32 ; li t3, 0 } + { slli tail, len, 3 ; xm.vsetc t3} +lui t3, %hi(vpu_vec_complex_conj_op) + addi t3,t3, %lo(vpu_vec_complex_conj_op) + + { xm.zexti tail, 5 ; srli len, len, 2 } + { xm.mkmsk tail, tail ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { addi len, len, -1 ; xm.vldr t3} + { add b, b, _32 ; xm.vlmul0 b} + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt len, .L_loop_top } + .L_loop_bot: + { nop ; xm.brff tail, .L_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vldr t3} + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vlmul0 b} + xm.vstrpv a, tail + + // To make sure the tail is captured in the headroom.. + { nop ; xm.vstd t3} + xm.vstrpv t3, tail + { nop ; xm.vldd t3} + { nop ; xm.vstd t3} + +.L_done: + xm.lddsp s3,s2,0 + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate_mul.S new file mode 100644 index 00000000..27b763af --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_conjugate_mul.S @@ -0,0 +1,116 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_conj_mul( + complex_s32_t* a, + const complex_s32_t* b, + const complex_s32_t* c, + const unsigned length, + const right_shift_t b_shr, + const right_shift_t c_shr); + +*/ + +.text +.p2align 2 + + +#define NSTACKWORDS (8+8+4) + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define tmp x20 +#define tmp_vec x21 + +#define bytemask len + + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_BYTEMASK 1 + +#define FUNCTION_NAME vect_complex_s32_conj_mul + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + mv shr_b, a4 + { slli t3, len, 3 ; nop} + mv shr_c, a5 + { li tmp, 32 ; nop} + { xm.zexti t3, 5 ; srli len, len, 2 } + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; xm.mkmsk t3, t3 } + { li t3, 0 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmski t3, 32 ; xm.vsetc t3} + { addi len, len, -1 ; xm.brff len, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top_s32: + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldd tmp_vec} + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldc tmp_vec} + { add b, b, tmp ; xm.vcmcr0 } + { add c, c, tmp ; xm.vcmci0 } + { add a, a, tmp ; xm.vstr a} + { addi len, len, -1 ; xm.bt len, .L_loop_top_s32 } + + .L_loop_bot_s32: + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_done_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vstd tmp_vec} + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldd tmp_vec} + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldc tmp_vec} + { nop ; xm.vcmcr0 } + { mv t3, tmp_vec ; xm.vcmci0 } + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr tmp_vec} + xm.vstrpv a, bytemask + +.L_done_s32: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_macc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_macc.S new file mode 100644 index 00000000..114b29b7 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_macc.S @@ -0,0 +1,132 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_macc( + complex_s32_t* acc, + const complex_s32_t* b, + const complex_s32_t* c, + const unsigned length, + const right_shift_t acc_shr, + const right_shift_t b_shr, + const right_shift_t c_shr); + +*/ + +.text +.p2align 2 + + +#define NSTACKWORDS (8+8+4) + +#define STACK_SHR_ACC (NSTACKWORDS+1) +#define STACK_SHR_B (NSTACKWORDS+2) +#define STACK_SHR_C (NSTACKWORDS+3) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_BYTEMASK 1 + +#define acc x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define tmp x20 +#define tmp_vec x21 +#define shr_acc x22 + +#define bytemask len + +#define FUNCTION_NAME vect_complex_s32_macc + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + mv shr_acc, a4 + { nop ; nop} + mv shr_b, a5 + { slli t3, len, 3 ; nop} + mv shr_c, a6 + { li tmp, 32 ; nop} + { xm.zexti t3, 5 ; srli len, len, 2 } + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; xm.mkmsk t3, t3 } + { li t3, 0 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmski t3, 32 ; xm.vsetc t3} + { addi len, len, -1 ; xm.brff len, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top_s32: + xm.vlashr acc, shr_acc + xm.vstrpv acc, t3 + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldc tmp_vec} + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldd tmp_vec} + { add b, b, tmp ; xm.vcmr0 } + { add c, c, tmp ; xm.vcmi0 } + { nop ; xm.vladd acc} + { add acc, acc, tmp ; xm.vstr acc} + { addi len, len, -1 ; xm.bt len, .L_loop_top_s32 } + +.L_loop_bot_s32: + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_done_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vstd tmp_vec} + xm.vlashr acc, shr_acc + xm.vstrpv acc, bytemask + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldc tmp_vec} + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldd tmp_vec} + { nop ; xm.vcmr0 } + { mv t3, tmp_vec ; xm.vcmi0 } + { nop ; xm.vladd acc} + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr tmp_vec} + xm.vstrpv acc, bytemask + +.L_done_s32: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mag.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mag.S new file mode 100644 index 00000000..bf45a262 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mag.S @@ -0,0 +1,166 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_mag( + int32_t a[], + const complex_s32_t* b, + const unsigned length, + const right_shift_t b_shr, + const complex_s32_t* rot_table + const unsigned table_rows); + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8+(8*NSTACKVECS)+4) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_TMP2 (NSTACKWORDS-16-4) + +#define FUNCTION_NAME vect_complex_s32_mag + +#define Q(R) R + +#define a x10 +#define b x11 +#define length x12 +#define b_shr x13 +#define _32 x18 +#define vec_tmp x19 +#define mask32 x20 +#define tmp x21 +#define iter x22 +#define tail_bytes x23 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li s8, 0 ; sw s8, 4 (sp)} + + { li _32, 32 ; li t3, 0 } + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + + { srli length, length, 2 ; mv tail_bytes, length } + { xm.mkmski mask32, 32 ; xm.zexti tail_bytes, 2 } + { slli tail_bytes, tail_bytes, 2 ; xm.brff length, .L_outer_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_outer_loop_top: + // vlashr b[0], b_shr + // vstrpv vec_tmp[0], mask32 + // { add b, b, _32 ; vsign } + // { ; vlmul vec_tmp[0] } + // vstrpv vec_tmp[0], mask32 + // { ; ldw x28, sp[STACK_ROT_TABLE] } + // { ; ldw iter, sp[STACK_TABLE_ROWS] } + + xm.vlashr b, b_shr + { nop ; xm.vsign } + mv t3, a4 + xm.vstrpv vec_tmp, mask32 + xm.vlashr b, b_shr + { add b, b, _32 ; xm.vlmul0 vec_tmp} + mv iter, a5 + xm.vstrpv vec_tmp, mask32 + + .L_inner_loop_top: + { addi iter, iter, -1 ; xm.vldd vec_tmp} + { add t3, t3, _32 ; xm.vldc t3} + { nop ; xm.vcmr0 } + { nop ; xm.vcmi0 } + xm.vstrpv vec_tmp, mask32 + { nop ; xm.vsign } + { nop ; xm.vlmul0 vec_tmp} + xm.vstrpv vec_tmp, mask32 + { nop ; xm.bt iter, .L_inner_loop_top } + + { nop ; xm.vstr vec_tmp} + { addi length, length, -1 ; lw t3,0 ( vec_tmp)} + { nop ; sw t3,0 ( a)} + { nop ; lw t3,8 ( vec_tmp)} + { nop ; sw t3,4 ( a)} + { addi a, a, 8 ; lw t3,16 ( vec_tmp)} + { nop ; sw t3,0 ( a)} + { nop ; lw t3,24 ( vec_tmp)} + { addi a, a, 8 ; sw t3,4 ( a)} + { nop ; xm.bt length, .L_outer_loop_top } + + .L_outer_loop_bot: + + { xm.mkmsk tail_bytes, tail_bytes ; xm.brff tail_bytes, .L_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, b_shr + + mv t3, a4 + { nop ; xm.vsign } + xm.vstrpv vec_tmp, mask32 + xm.vlashr b, b_shr + { nop ; xm.vlmul0 vec_tmp} + mv iter, a5 + xm.vstrpv vec_tmp, mask32 + + .L_inner_loop2_top: + { addi iter, iter, -1 ; xm.vldd vec_tmp} + { add t3, t3, _32 ; xm.vldc t3} + { nop ; xm.vcmr0 } + { nop ; xm.vcmi0 } + xm.vstrpv vec_tmp, mask32 + { nop ; xm.vsign } + { nop ; xm.vlmul0 vec_tmp} + xm.vstrpv vec_tmp, mask32 + { nop ; xm.bt iter, .L_inner_loop2_top } + + { nop ; lw t3,0 ( vec_tmp)} + { nop ; sw t3,0 ( vec_tmp)} + { nop ; lw t3,8 ( vec_tmp)} + { nop ; sw t3,4 ( vec_tmp)} + { nop ; lw t3,16 ( vec_tmp)} + { nop ; sw t3,8 ( vec_tmp)} + { nop ; lw t3,24 ( vec_tmp)} + { addi t3,sp, (STACK_VEC_TMP)*4 ; sw t3,12 ( vec_tmp)} + { nop ; xm.vclrdr } + { nop ; xm.vldr t3} + { nop ; xm.vstd t3} + xm.vstrpv t3, tail_bytes + xm.vstrpv a, tail_bytes + { nop ; xm.vldd t3} + { nop ; xm.vstd t3} + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mul.S new file mode 100644 index 00000000..b89b4f39 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_mul.S @@ -0,0 +1,116 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_mul( + complex_s32_t* a, + const complex_s32_t* b, + const complex_s32_t* c, + const unsigned length, + const right_shift_t b_shr, + const right_shift_t c_shr); + +*/ + +.text +.p2align 2 + + +#define NSTACKWORDS (8+8+4) + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define tmp x20 +#define tmp_vec x21 + +#define bytemask len + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_BYTEMASK 1 + +#define FUNCTION_NAME vect_complex_s32_mul + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + mv shr_b, a4 + { slli t3, len, 3 ; nop} + mv shr_c, a5 + { li tmp, 32 ; nop} + { xm.zexti t3, 5 ; srli len, len, 2 } + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; xm.mkmsk t3, t3 } + { li t3, 0 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmski t3, 32 ; xm.vsetc t3} + { addi len, len, -1 ; xm.brff len, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top_s32: + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldc tmp_vec} + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldd tmp_vec} + { add b, b, tmp ; xm.vcmr0 } + { add c, c, tmp ; xm.vcmi0 } + { add a, a, tmp ; xm.vstr a} + { addi len, len, -1 ; xm.bt len, .L_loop_top_s32 } + +.L_loop_bot_s32: + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_done_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vstd tmp_vec} + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldc tmp_vec} + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldd tmp_vec} + { nop ; xm.vcmr0 } + { mv t3, tmp_vec ; xm.vcmi0 } + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr tmp_vec} + xm.vstrpv a, bytemask + +.L_done_s32: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_nmacc.S new file mode 100644 index 00000000..204fb92b --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_nmacc.S @@ -0,0 +1,128 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_nmacc( + complex_s32_t* acc, + const complex_s32_t* b, + const complex_s32_t* c, + const unsigned length, + const right_shift_t acc_shr, + const right_shift_t b_shr, + const right_shift_t c_shr); + +*/ + +.text +.p2align 2 + + +#define NSTACKWORDS (8+8+4) + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_BYTEMASK 1 + +#define acc x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define tmp x20 +#define tmp_vec x21 +#define shr_acc x22 + +#define bytemask len + +#define FUNCTION_NAME vect_complex_s32_nmacc + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + + mv shr_acc, a4 + mv shr_b, a5 + mv shr_c, a6 + { slli t3, len, 3 ; nop} + { li tmp, 32 ; nop} + { xm.zexti t3, 5 ; srli len, len, 2 } + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; xm.mkmsk t3, t3 } + { li t3, 0 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { xm.mkmski t3, 32 ; xm.vsetc t3} + { addi len, len, -1 ; xm.brff len, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top_s32: + xm.vlashr acc, shr_acc + xm.vstrpv acc, t3 + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldc tmp_vec} + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, t3 + { nop ; xm.vldd tmp_vec} + { add b, b, tmp ; xm.vcmr0 } + { add c, c, tmp ; xm.vcmi0 } + { nop ; xm.vlsub acc} + { add acc, acc, tmp ; xm.vstr acc} + { addi len, len, -1 ; xm.bt len, .L_loop_top_s32 } + +.L_loop_bot_s32: + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_done_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vstd tmp_vec} + xm.vlashr acc, shr_acc + xm.vstrpv acc, bytemask + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldc tmp_vec} + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldd tmp_vec} + { nop ; xm.vcmr0 } + { mv t3, tmp_vec ; xm.vcmi0 } + { nop ; xm.vlsub acc} + xm.vstrpv tmp_vec, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr tmp_vec} + xm.vstrpv acc, bytemask + +.L_done_s32: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_real_mul.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_real_mul.S new file mode 100644 index 00000000..44140a7a --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_real_mul.S @@ -0,0 +1,134 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_real_mul( + complex_s32_t* a, + const complex_s32_t* b, + const int32_t c[], + const unsigned length, + const right_shift_t b_shr, + const right_shift_t c_shr); + +*/ + +.text +.p2align 2 + +#define FUNCTION_NAME vect_complex_s32_real_mul + +#define NSTACKVECS (2) +#define NSTACKWORDS (12+8*NSTACKVECS+4) + + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_C1 (NSTACKWORDS-16-4) + + +#define STACK_TAIL_LEN (8) + + +#define a x10 +#define b x11 +#define c x12 +#define length x13 +#define b_shr x18 +#define c_shr x19 +#define _32 x20 +#define vec_tmp x21 +#define vec_c1 x22 +#define tmpA x23 +#define tmpB x24 + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { li t3, 0 ; sw s8, 4 (sp)} + { mv t3, length ; xm.vsetc t3} + mv b_shr, a4 + { srli length, length, 2 ; nop} + mv c_shr, a5 + { xm.zexti t3, 2 ; nop} + +{ xm.ldawsp vec_tmp, STACK_VEC_TMP*4 ; nop } +{ xm.ldawsp vec_c1, STACK_VEC_C1*4 ; nop } + { xm.mkmski t3, 32 ; sw t3, (STACK_TAIL_LEN)*4 (sp)} + { li _32, 32 ; xm.brff length, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + + .L_loop_top: + + xm.vlashr c, c_shr + xm.vstrpv vec_tmp, t3 + xm.lddsp tmpA,tmpB,((STACK_VEC_TMP/2) + 0)*8 + xm.stdsp tmpA,tmpA,((STACK_VEC_C1/2) + 0)*8 + xm.stdsp tmpB,tmpB,((STACK_VEC_C1/2) + 1)*8 + xm.lddsp tmpA,tmpB,((STACK_VEC_TMP/2) + 1)*8 + { addi c, c, 8 ; addi length, length, -1 } + xm.stdsp tmpA,tmpA,((STACK_VEC_C1/2) + 2)*8 + xm.stdsp tmpB,tmpB,((STACK_VEC_C1/2) + 3)*8 + + xm.vlashr b, b_shr + { add b, b, _32 ; xm.vlmul0 vec_c1} + { add a, a, _32 ; xm.vstr a} + { addi c, c, 8 ; xm.bt length, .L_loop_top } + +.L_loop_bot: + + { addi t3,sp, (STACK_VEC_C1)*4 ; lw length, (STACK_TAIL_LEN)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { slli length, length, 3 ; xm.brff length, .L_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { xm.mkmsk length, length ; xm.vclrdr } + { nop ; xm.vstd vec_tmp} + xm.vlashr c, c_shr + xm.vstrpv vec_tmp, length + xm.lddsp tmpA,tmpB,((STACK_VEC_TMP / 2) + 0)*8 + xm.stdsp tmpA,tmpA,((STACK_VEC_C1 / 2) + 0)*8 + xm.stdsp tmpB,tmpB,((STACK_VEC_C1 / 2) + 1)*8 + xm.lddsp tmpA,tmpB,((STACK_VEC_TMP / 2) + 1)*8 + xm.stdsp tmpA,tmpA,((STACK_VEC_C1 / 2) + 2)*8 + xm.stdsp tmpB,tmpB,((STACK_VEC_C1 / 2) + 3)*8 + xm.vlashr b, b_shr + { nop ; xm.vlmul0 t3} + xm.vstrpv a, length + xm.vstrpv vec_tmp, length + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 4 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_squared_mag.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_squared_mag.S new file mode 100644 index 00000000..7dcf2db1 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_squared_mag.S @@ -0,0 +1,120 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + +headroom_t vect_complex_s32_squared_mag( + int32_t a[], + const complex_s32_t* b, + const unsigned length, + const right_shift_t b_shr); + +*/ + +.text +.p2align 2 + + +#define NSTACKWORDS (7+8+1) + +#define a x10 +#define b x11 +#define length x12 +#define b_shr x13 +#define vec_count x18 +#define _16 x19 +#define vec_tmp x20 +#define tail_mask x21 +#define vec_ones x22 +#define tmpA x23 +#define tmpB x24 + + +#define STACK_VEC_TMP (NSTACKWORDS-8-1) + +#define FUNCTION_NAME vect_complex_s32_squared_mag + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + { nop ; sw s8, 24 (sp)} + + { li _16, 16 ; srli vec_count, length, 2 } + { li t3, 0 ; slli tail_mask, length, 3 } + { xm.zexti tail_mask, 5 ; xm.vsetc t3} + +lui t3, %hi(vpu_vec_complex_ones) + addi t3,t3, %lo(vpu_vec_complex_ones) + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; mv vec_ones, t3 } + { xm.mkmski t3, 32 ; xm.brff vec_count, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + xm.vlashr b, b_shr + xm.vstrpv vec_tmp, t3 + { add b, b, _16 ; xm.vldc vec_tmp} + { add b, b, _16 ; xm.vldd vec_tmp} + { addi vec_count, vec_count, -1 ; xm.vcmcr0 } + { nop /* zero out imag part so that we don't */ ; xm.vlmul0 vec_ones} + { nop /* clobber the headroom counter */ ; xm.vstr vec_tmp} + { nop ; lw tmpA,0 ( vec_tmp)} + { nop ; lw tmpB,8 ( vec_tmp)} + xm.stdi tmpA,tmpB, 0(a) + { nop ; lw tmpA,16 ( vec_tmp)} + { nop ; lw tmpB,24 ( vec_tmp)} + xm.stdi tmpA,tmpB, 8(a) + { add a, a, _16 ; xm.bt vec_count, .L_loop_top } + + .L_loop_bot: + { xm.zexti length, 2 ; xm.brff tail_mask, .L_done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { slli length, length,1 ; xm.vclrdr } + { xm.mkmsk tail_mask, tail_mask ; xm.vstd vec_tmp} + xm.vlashr b, b_shr + xm.vstrpv vec_tmp, tail_mask + { li tmpA, 6 ; xm.vldd vec_tmp} + { sub length, tmpA, length ; xm.vldc vec_tmp} + { nop ; xm.vcmcr0 } + { nop ; xm.vlmul0 vec_ones} + { xm.shli length, length, 1 ; xm.vstr vec_tmp} + { nop ; xm.bru length } + { nop ; lw t3,16 ( vec_tmp)} + { nop ; sw t3,8 ( a)} + { nop ; lw t3,8 ( vec_tmp)} + { nop ; sw t3,4 ( a)} + { nop ; lw t3,0 ( vec_tmp)} + { nop ; sw t3,0 ( a)} +.L_done: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 24 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_sum.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_sum.S new file mode 100644 index 00000000..ebf39336 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_sum.S @@ -0,0 +1,150 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include "../asm_helper.h" + +/* + + +void vect_complex_s32_sum( + const complex_s64_t* res, + const complex_s32_t* b, + const unsigned length, + const right_shift_t b_shr); + + +*/ + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8+(8*NSTACKVECS)+4) + +#define b x10 +#define b_shr x11 +#define length x12 +#define _32 x13 +#define tmp x18 +#define tail_bytes x19 + +#define STACK_VEC_ZEROS (NSTACKWORDS- 8-4) +#define STACK_VEC_TMP (NSTACKWORDS-16-4) + +#define STACK_RES (1) + +#define FUNCTION_NAME vect_complex_s32_sum + + + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + { mv b, a1 ; sw a0, (STACK_RES)*4 (sp)} + + + { mv b_shr, a3 ; slli tail_bytes, length, 3 } + { nop ; xm.zexti tail_bytes, 5 } + + { addi t3,sp, (STACK_VEC_ZEROS)*4 ; xm.vclrdr } + { li t3, 0 ; xm.vstd t3} + { xm.slt tmp, b_shr, t3 ; xm.vsetc t3} + { addi tmp,sp, (STACK_VEC_TMP)*4 ; xm.assertn tmp /*Cannot be negative shift*/ } +lui t3, %hi(vpu_vec_0x40000000) + addi t3,t3, %lo(vpu_vec_0x40000000) + xm.vlashr t3, b_shr + { li t3, 0 ; xm.vstr tmp} + { srli length, length, 2 ; xm.vldc tmp} + { li _32, 32 ; xm.vsetc t3} + { nop ; xm.vclrdr } + + { nop ; xm.brff length, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + + { addi length, length, -1 ; xm.vlmacc0 b} + { add b, b, _32 ; xm.bt length, .L_loop_top } + + .L_loop_bot: + + { addi t3,sp, (STACK_VEC_ZEROS)*4 ; xm.brff tail_bytes, .L_get_res }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { sub t3, t3, tail_bytes ; nop } + { nop ; xm.vldc t3} + { addi t3,sp, (STACK_VEC_ZEROS)*4 ; xm.vlmacc0 b} + + +/* We've got 8 40-bit accumulators. Lower 32 bits are in vR, upper 8 in vD. + vD does appear to sign-extend the values up to 64 bits. + + (vD:vR)[k] == ((int32_t)vD[k])*(2^32) + ((uint32_t)vR[k]) */ + +#define real_hi x10 +#define real_lo x11 +#define imag_hi x12 +#define imag_lo x13 +#define num x19 +#define tmp_re x20 +#define tmp_im x21 + +// astew [2020-10-16]: There's probably a faster way to do this. See the VPU-based solution I found for vect_s32_sum for +// non-complex values + +.L_get_res: + { li real_hi, 0 ; li imag_hi, 0 } + { li num, 1 ; xm.vstr tmp} + xm.lddi real_lo,imag_lo, 0(tmp) + xm.lddi tmp_re,tmp_im, 8(tmp) + xm.maccu real_hi, real_lo, num, tmp_re + xm.maccu imag_hi, imag_lo, num, tmp_im + xm.lddi tmp_re,tmp_im, 16(tmp) + xm.maccu real_hi, real_lo, num, tmp_re + xm.maccu imag_hi, imag_lo, num, tmp_im + xm.lddi tmp_re,tmp_im, 24(tmp) + xm.maccu real_hi, real_lo, num, tmp_re + xm.maccu imag_hi, imag_lo, num, tmp_im + { nop ; xm.vfttf } + { li num, 2 ; xm.vstd t3} + xm.lddi tmp_re,tmp_im, 0(t3) + { add real_hi, real_hi, tmp_re ; add imag_hi, imag_hi, tmp_im } + + // astew [2021-09-28]: ... what was the purpose of these next 4 instructions..? + // maybe at the time I was thinking the lower word should be + // interpreted as signed? + // { shr tmp_re, real_lo, 1 ; zext real_lo, 1 } + // { shr tmp_im, imag_lo, 1 ; zext imag_lo, 1 } + // maccs real_hi, real_lo, num, tmp_re + // maccs imag_hi, imag_lo, num, tmp_im + { nop ; lw tmp, (STACK_RES)*4 (sp)} + xm.stdi real_lo,real_hi, 0(tmp) + xm.stdi imag_lo,imag_hi, 8(tmp) + + + +.L_done: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + +#undef FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_to_complex_s16.S b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_to_complex_s16.S new file mode 100644 index 00000000..732d45fe --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_complex_s32/vect_complex_s32_to_complex_s16.S @@ -0,0 +1,107 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + + + +#if defined(__VX4B__) + + +/* + +headroom_t vect_complex_s32_to_vect_complex_s16( + int16_t* a_real, + int16_t* a_imag, + const complex_s32_t* b, + const unsigned length, + const right_shift_t b_shr); + +*/ + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8 + (8*NSTACKVECS)+4) + +#define FUNCTION_NAME vect_complex_s32_to_vect_complex_s16 + +#define STACK_VEC_TMP (NSTACKWORDS-16-4) + +#define STACK_B_SHR (NSTACKWORDS+1) + +#define a_real x10 +#define a_imag x11 +#define b x12 +#define len x13 +#define b_shr x18 +#define tail x19 +#define _28 x20 +#define mask x21 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + { li t3, 0 ; li _28, 28 } + addi b,b,-4 + { li t3, 16 ; xm.vsetc t3} + { srli len, len, 2 ; slli tail, len, 1 } + mv b_shr, a4 + { xm.mkmski mask, 8 ; nop} + { sub b_shr, b_shr, t3 ; xm.zexti tail, 3 } +lui t3, %hi(vpu_vec_complex_pos_j) + addi t3,t3, %lo(vpu_vec_complex_pos_j) + { xm.mkmsk tail, tail ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + xm.vlashr b, b_shr + { addi b, b, 4 ; xm.vlmul0 t3} + { addi len, len, -1 ; xm.vdepth16 } + { nop ; xm.vdepth16 } + xm.vstrpv a_real, mask + xm.vlashr b, b_shr + { add b, b, _28 ; xm.vlmul0 t3} + { addi a_real, a_real, 8 ; xm.vdepth16 } + { nop ; xm.vdepth16 } + xm.vstrpv a_imag, mask + { addi a_imag, a_imag, 8 ; xm.bt len, .L_loop_top } + + +.L_loop_bot: + + + + beqz tail, .L_finish + xm.vlashr b, b_shr + xm.vlmul0 t3 + xm.vdepth16 + xm.vdepth16 + xm.vstrpv a_real, tail + addi b, b, 4 + xm.vlashr b, b_shr + xm.vlmul0 t3 + xm.vdepth16 + xm.vdepth16 + xm.vstrpv a_imag, tail + + +.L_finish: + xm.lddsp s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + xm.lddsp s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_macc.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_macc.S new file mode 100644 index 00000000..1ef7cae7 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_macc.S @@ -0,0 +1,77 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + +/* + Complex conjugate multiply-accumulate + + a[k] = a[k] + b[k] (*c) conjugate(c[k]) + + Note: a[], b[] and c[] must all be 8-byte aligned + + void vect_complex_f32_conj_macc( + complex_float_t a[], + const complex_float_t b[], + const complex_float_t c[], + const unsigned length); + +*/ + +#define FUNC_NAME vect_complex_f32_conj_macc +#define NSTACKWORDS 12 + +.globl FUNC_NAME +.type FUNC_NAME,@function + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define B_re x18 +#define B_im x19 +#define C_re x20 +#define C_im x21 +#define A_re x22 +#define A_im x23 + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + +{ li t3, 0 ; addi len, len, -1 } +.L_loop_top: + xm.ldd A_re,A_im, len(a) + xm.ldd B_re,B_im, len(b) + xm.ldd C_re,C_im, len(c) + xm.fmacc A_re, A_re, B_re, C_re // A[k].re += B[k].re * C[k].re + xm.fmacc A_re, A_re, C_im, B_im // A[k].re += C[k].im * B[k].im + xm.fsub C_im, t3, C_im // C_im <-- -C[k].im + xm.fmacc A_im, A_im, B_re, C_im // A[k].re -= B[k].re * C[k].im + xm.fmacc A_im, A_im, B_im, C_re // A[k].re += B[k].im * C[k].re + xm.std A_re,A_im, len(a) + { addi len, len, -1 ; xm.bt len, .L_loop_top } +.L_loop_bot: + +.L_done: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + .set FUNC_NAME.nstackwords,NSTACKWORDS; .globl FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ + .set FUNC_NAME.maxcores,1; .globl FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ + .set FUNC_NAME.maxtimers,0; .globl FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ + .set FUNC_NAME.maxchanends,0; .globl FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ +.Ltmp1: + .size FUNC_NAME, .Ltmp1-FUNC_NAME + +#undef NSTACKWORDS + + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_mul.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_mul.S new file mode 100644 index 00000000..ddd3c84d --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_conj_mul.S @@ -0,0 +1,81 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + +/* + Complex multiply + + a[k] = b[k] (*c) ( conjugate(c[k]) ) + + It is safe to use the same argument twice, so + vect_complex_f32_conj_mul(x[], x[], y[]) --> x *= y + + Note: a[], b[] and c[] must all be 8-byte aligned + + void vect_complex_f32_conj_mul( + complex_float_t a[], + const complex_float_t b[], + const complex_float_t c[], + const unsigned length); + +*/ + +#define FUNC_NAME vect_complex_f32_conj_mul +#define NSTACKWORDS 12 + +.globl FUNC_NAME +.type FUNC_NAME,@function + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define B_re x18 +#define B_im x19 +#define C_re x20 +#define C_im x21 +#define A_re x22 +#define A_im x23 + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + +{ li t3, 0 ; addi len, len, -1 } +.L_loop_top: + xm.ldd B_re,B_im, len(b) + xm.ldd C_re,C_im, len(c) + + xm.fmul A_re, B_re, C_re // A[k].re = B[k].re * C[k].re + xm.fmacc A_re, A_re, B_im, C_im // A[k].re += B[k].im * C[k].im + xm.fsub C_im, t3, C_im // C_im <-- -C[k].im + xm.fmul A_im, B_re, C_im // A[k].im = B_re * -C[k].im + xm.fmacc A_im, A_im, C_re, B_im // A[k].im += C_re * B[k].im + + xm.std A_re,A_im, len(a) + { addi len, len, -1 ; xm.bt len, .L_loop_top } +.L_loop_bot: + +.L_done: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + .set FUNC_NAME.nstackwords,NSTACKWORDS; .globl FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ + .set FUNC_NAME.maxcores,1; .globl FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ + .set FUNC_NAME.maxtimers,0; .globl FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ + .set FUNC_NAME.maxchanends,0; .globl FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ +.Ltmp1: + .size FUNC_NAME, .Ltmp1-FUNC_NAME + +#undef NSTACKWORDS + + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_macc.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_macc.S new file mode 100644 index 00000000..9d65c0e8 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_macc.S @@ -0,0 +1,77 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + +/* + Complex multiply-accumulate + + a[k] = a[k] + b[k] (*c) c[k] + + Note: a[], b[] and c[] must all be 8-byte aligned + + void vect_complex_f32_macc( + complex_float_t a[], + const complex_float_t b[], + const complex_float_t c[], + const unsigned length); + +*/ + +#define FUNC_NAME vect_complex_f32_macc +#define NSTACKWORDS 12 + +.globl FUNC_NAME +.type FUNC_NAME,@function + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define B_re x18 +#define B_im x19 +#define C_re x20 +#define C_im x21 +#define A_re x22 +#define A_im x23 + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + +{ li t3, 0 ; addi len, len, -1 } +.L_loop_top: + xm.ldd A_re,A_im, len(a) + xm.ldd B_re,B_im, len(b) + xm.ldd C_re,C_im, len(c) + xm.fmacc A_im, A_im, B_re, C_im // A[k].im += B[k].re * C[k].im + xm.fmacc A_im, A_im, C_re, B_im // A[k].im += C[k].re * B[k].im + xm.fsub B_im, t3, B_im // B_im <-- 0 - B[k].im = -B[k].im + xm.fmacc A_re, A_re, B_re, C_re // A[k].re += B[k].re * C[k].re + xm.fmacc A_re, A_re, B_im, C_im // A[k].re -= B[k].im * C[k].im + xm.std A_re,A_im, len(a) + { addi len, len, -1 ; xm.bt len, .L_loop_top } +.L_loop_bot: + +.L_done: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + .set FUNC_NAME.nstackwords,NSTACKWORDS; .globl FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ + .set FUNC_NAME.maxcores,1; .globl FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ + .set FUNC_NAME.maxtimers,0; .globl FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ + .set FUNC_NAME.maxchanends,0; .globl FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ +.Ltmp1: + .size FUNC_NAME, .Ltmp1-FUNC_NAME + +#undef NSTACKWORDS + + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_mul.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_mul.S new file mode 100644 index 00000000..c506e646 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_complex_f32_mul.S @@ -0,0 +1,82 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + +/* + Complex multiply + + a[k] = b[k] (*c) c[k] + + It is safe to use the same argument twice, so + vect_complex_f32_mul(x[], x[], y[]) --> x *= y + + Note: a[], b[] and c[] must all be 8-byte aligned + + void vect_complex_f32_mul( + complex_float_t a[], + const complex_float_t b[], + const complex_float_t c[], + const unsigned length); + +*/ + +#define FUNC_NAME vect_complex_f32_mul +#define NSTACKWORDS 12 + +.globl FUNC_NAME +.type FUNC_NAME,@function + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define B_re x18 +#define B_im x19 +#define C_re x20 +#define C_im x21 +#define A_re x22 +#define A_im x23 + + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + +{ li t3, 0 ; addi len, len, -1 } +.L_loop_top: + xm.ldd B_re,B_im, len(b) + xm.ldd C_re,C_im, len(c) + + xm.fmul A_im, B_re, C_im // A[k].im = B[k].re * C[k].im + xm.fmacc A_im, A_im, C_re, B_im // A[k].im += C[k].re * B[k].im + xm.fsub B_im, t3, B_im + xm.fmul A_re, B_re, C_re + xm.fmacc A_re, A_re, B_im, C_im + + xm.std A_re,A_im, len(a) + { addi len, len, -1 ; xm.bt len, .L_loop_top } +.L_loop_bot: + +.L_done: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + .set FUNC_NAME.nstackwords,NSTACKWORDS; .globl FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ + .set FUNC_NAME.maxcores,1; .globl FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ + .set FUNC_NAME.maxtimers,0; .globl FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ + .set FUNC_NAME.maxchanends,0; .globl FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ +.Ltmp1: + .size FUNC_NAME, .Ltmp1-FUNC_NAME + +#undef NSTACKWORDS + + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_add.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_add.S new file mode 100644 index 00000000..197adba3 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_add.S @@ -0,0 +1,88 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + +/* + Note: This works for real or complex floats, just use double the length for complex. + + a[k] = b[k] + c[k] + + It is safe to use the same argument twice, so + vect_f32_add(x[], x[], y[]) --> x += y + + Note: a[], b[] and c[] must all be 8-byte aligned + + void vect_f32_add( + float a[], + const float b[], + const float c[], + const unsigned length); + +*/ + +#define FUNC_NAME vect_f32_add +#define NSTACKWORDS 8 + +.globl FUNC_NAME +.type FUNC_NAME,@function + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define B0 x18 +#define B1 x19 +#define C0 x20 +#define C1 x21 + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + +{ mv t3, len ; xm.zexti len, 1 } +{ srli len, t3, 1 ; xm.brff len, .L_even }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +.L_odd: + { addi t3, t3, -1 ; nop } + { nop ; xm.ldw B0, t3 (b)} + { nop ; xm.ldw C0, t3 (c)} + xm.fadd B0, B0, C0 + xm.stw B0, t3(a) +.L_even: + +{ addi len, len, -1 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +{ nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.ldd B0,B1, len(b) + xm.ldd C0,C1, len(c) + xm.fadd B0, B0, C0 + xm.fadd B1, B1, C1 + xm.std B0,B1, len(a) + { addi len, len, -1 ; xm.bt len, .L_loop_top } +.L_loop_bot: + +.L_done: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + addi a0, t3, 0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + + // RETURN_REG_HOLDER + .set FUNC_NAME.nstackwords,NSTACKWORDS; .globl FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ + .set FUNC_NAME.maxcores,1; .globl FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ + .set FUNC_NAME.maxtimers,0; .globl FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ + .set FUNC_NAME.maxchanends,0; .globl FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ +.Ltmp1: + .size FUNC_NAME, .Ltmp1-FUNC_NAME + +#undef NSTACKWORDS + + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_dot.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_dot.S new file mode 100644 index 00000000..e676c3fa --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_dot.S @@ -0,0 +1,118 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + +/* + + float vect_f32_dot( + const float b[], + const float c[], + const unsigned length); + +*/ + +#define FUNC_NAME vect_f32_dot +#define NSTACKWORDS 8 + +.globl FUNC_NAME +.type FUNC_NAME,@function + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + +{ mv a3, a2 ; xm.zexti a2, 1 } +{ xm.brff a2, .even ; li t3, 0 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.odd: + // Deal with tail first + addi a3, a3, -1 + xm.ldw s2, a3(a0) + xm.ldw s3, a3(a1) + xm.fmacc t3, t3, s2, s3 + +.even: + +// 4 possibilities: +// b[] and c[] are (both) DWORD aligned +// c[] and c[] are (both) not DWORD aligned +// b[] or c[] is DWORD aligned, and the other is not. +// Figure out which situation applies, because it will affect whether we can +// do load-doubles and whether the two vectors are aligned if we do. +{ srli s4, a0, 2 ; srli s5, a1, 2 } +{ xm.zexti s4, 1 ; xm.zexti s5, 1 } +{ slli s4, s4, 1 ; mv a2, a3 } +{ or s4, s4, s5 ; xm.brff a2, .done }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +{ addi a2, a2, -1 ; xm.bru s4 } + tail .together + tail .r1_odd + tail .r0_odd + +// b[] and c[] are both not DWORD aligned. +// deal with final element, and shift pointers to be DWORD aligned +.r0r1_odd: + { addi a0, a0, -4 ; xm.ldw s4, a2 (a0)} + { addi a1, a1, -4 ; xm.ldw s5, a2 (a1)} + srli a2, a2, 1 + .r0r1_odd_loop: + xm.fmacc t3, t3, s4, s5 + xm.ldd s4,s2, a2(a0) + xm.ldd s5,s3, a2(a1) + xm.fmacc t3, t3, s2, s3 + { addi a2, a2, -1 ; xm.bt a2, .r0r1_odd_loop } + .r0r1_odd_loop_done: + tail .done + +// c[] was odd and b[] even. +// Since the operands are symmetric (doesn't matter which is which), we can just +// swap pointers and pretend it was the other way around. +.r1_odd: + { mv a0, a1 ; mv a1, a0 } +// b[] was odd and c[] even. +.r0_odd: + { srli a2, a2, 1 ; xm.ldw s4, a2 (a0)} + addi a0, a0, -4 + .r0_odd_loop: + xm.ldd s5,s3, a2(a1) + xm.fmacc t3, t3, s4, s3 + xm.ldd s4,s2, a2(a0) + xm.fmacc t3, t3, s2, s5 + { addi a2, a2, -1 ; xm.bt a2, .r0_odd_loop } + .r0_odd_loop_done: + tail .done + + nop + +.together: + srli a2, a2, 1 + .together_loop: + xm.ldd s4,s2, a2(a0) + xm.ldd s5,s3, a2(a1) + xm.fmacc t3, t3, s2, s3 + xm.fmacc t3, t3, s4, s5 + { addi a2, a2, -1 ; xm.bt a2, .together_loop } + +.done: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + addi a0, t3, 0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + + // RETURN_REG_HOLDER + .set FUNC_NAME.nstackwords,NSTACKWORDS; .globl FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ + .set FUNC_NAME.maxcores,1; .globl FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ + .set FUNC_NAME.maxtimers,0; .globl FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ + .set FUNC_NAME.maxchanends,0; .globl FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ +.Ltmp1: + .size FUNC_NAME, .Ltmp1-FUNC_NAME + +#undef NSTACKWORDS + + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_max_exponent.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_max_exponent.S new file mode 100644 index 00000000..26e9af48 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_max_exponent.S @@ -0,0 +1,72 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + + +/* + + exponent_t vect_f32_max_exponent( + const float b[], + const unsigned length); + +*/ + +#define NSTACKWORDS 4 +#define FUNC_NAME vect_f32_max_exponent + +.globl FUNC_NAME +.type FUNC_NAME,@function + +#define b x10 +#define len x11 + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + { mv t3, len ; xm.mkmski a2, 32 } + { slli a2, a2, 16 ; xm.zexti t3, 1 } + { srli len, len, 1 ; xm.brff t3, .L_even_elms }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +// Handle the tail first + xm.ldd s3,s2, len(b) + xm.fsexp t3, s3, s3 + { mv a2, s3 ; nop } + +.L_even_elms: + { addi len, len, -1 ; xm.brff len, .loop_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .loop: + xm.ldd s3,s2, len (b) + xm.fsexp t3, s2, s2 + xm.fsexp a3, s3, s3 + { xm.slt t3, s2, a2 ; nop } + { xm.slt t3, s3, a2 ; xm.bt t3, .not } + { xm.slt t3, s3, s2 ; mv a2, s2 } + .not: + bnez t3, .not2 + mv a2, s3 + .not2: + { addi len, len, -1 ; xm.bt len, .loop } + .loop_end: + + xm.lddsp s3,s2,0 + li a0, 30 + sub a0, a2, a0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + // RETURN_REG_HOLDER + .set FUNC_NAME.nstackwords,NSTACKWORDS; .globl FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ + .set FUNC_NAME.maxcores,1; .globl FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ + .set FUNC_NAME.maxtimers,0; .globl FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ + .set FUNC_NAME.maxchanends,0; .globl FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ +.Ltmp0: + .size FUNC_NAME, .Ltmp0-FUNC_NAME + +#undef NSTACKWORDS + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_to_s32.S b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_to_s32.S new file mode 100644 index 00000000..0a91e8a3 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_f32/vect_f32_to_s32.S @@ -0,0 +1,90 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + + +/* + + void vect_f32_to_vect_s32( + int32_t a[], + const float b[], + const unsigned length, + const exponent_t exp); + +*/ + +#define NSTACKWORDS 8 +#define FUNC_NAME vect_f32_to_vect_s32 + +.globl FUNC_NAME +.type FUNC_NAME,@function + +#define a x10 +#define b x11 +#define len x12 +#define exp x13 + +#define mant1 x18 +#define mant0 x19 +#define tmp x20 + + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + + //handle tail first + { srli t3, len, 1 ; li tmp, 23 } + { xm.zexti len, 1 ; add exp, exp, tmp } + { mv len, t3 ; xm.brff len, .L_pre_loop }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.ldd mant0,mant1, len(b) + xm.fsexp t3, tmp, mant0 + xm.fmant mant0, mant0 + { sub tmp, tmp, exp ; xm.brff t3, .L_tail_pos }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { xm.neg mant0, mant0 ; nop } +.L_tail_pos: + { xm.shl mant0, mant0, tmp ; xm.shl t3, len, 1 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mant0, mant0, tmp \nMessage: The shift amount is not 32" */ + xm.stw mant0, t3(a) + +.L_pre_loop: + { addi len, len, -1 ; xm.brff len, .L_loop_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop: + xm.ldd mant0,mant1, len(b) + xm.fsexp t3, tmp, mant1 + xm.fmant mant1, mant1 + { sub tmp, tmp, exp ; xm.brff t3, .L_not3 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.neg mant1, mant1 + .L_not3: + xm.shl mant1, mant1, tmp/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mant1, mant1, tmp\nMessage: The shift amount is not 32" */ + xm.fsexp t3, tmp, mant0 + xm.fmant mant0, mant0 + { sub tmp, tmp, exp ; xm.brff t3, .L_not4 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.neg mant0, mant0 + .L_not4: + xm.shl mant0, mant0, tmp/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli mant0, mant0, tmp\nMessage: The shift amount is not 32" */ + xm.std mant0,mant1, len(a) + { addi len, len, -1 ; xm.bt len, .L_loop } + .L_loop_end: + + + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + // RETURN_REG_HOLDER + .set FUNC_NAME.nstackwords,NSTACKWORDS; .globl FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 39. */ + .set FUNC_NAME.maxcores,1; .globl FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 26. */ + .set FUNC_NAME.maxtimers,0; .globl FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 27. */ + .set FUNC_NAME.maxchanends,0; .globl FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 29. */ +.Ltmp0: + .size FUNC_NAME, .Ltmp0-FUNC_NAME + +#undef NSTACKWORDS + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs.S new file mode 100644 index 00000000..4f0d6ad4 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs.S @@ -0,0 +1,124 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKWORDS (20) +#define STACK_TMP_VEC 2 + +#define a x10 +#define b x11 +#define len x12 +#define tail x13 + + + +/* +headroom_t vect_s16_abs( + int16_t a[], + const int16_t b[], + const unsigned length); +*/ + +vect_s16_abs: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + li t3, 0x0100 + { slli tail, len, SIZEOF_LOG2_S16 ; srli len, len, EPV_LOG2_S16 }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.vsetc t3} + { nop ; xm.bu .L_apply_op } + +.L_func_end_s16: + +#undef a +#undef b +#undef len + +/* + When branching here: + * a --> x10 + * b --> x11 + * loop_count --> x12 + * tail --> x13 + * VPU mode must already be set. +*/ + +#define a x10 +#define b x11 +#define loop_count x12 +#define tail x13 + +.type .L_apply_op,@function + +.L_apply_op: + + { xm.mkmsk tail, tail ; nop } + { mv s3, b ; xm.brff loop_count, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { li a1, 32 ; xm.bu .L_loop_top } +.p2align 4 +.L_loop_top: + { addi loop_count, loop_count, -1 ; xm.vldr s3} + { nop ; xm.vsign } + { nop ; xm.vlmul0 s3} + xm.vlmul1 s3 + + {addi a1,sp, (STACK_TMP_VEC)*4 ; xm.vgetc t3} + {nop; xm.vstr a1} + {li a1, 32; xm.vladd a1} + {nop; xm.vsetc t3} + + { add a, a, a1 ; xm.vstr a} + { add s3, s3, a1 ; xm.bt loop_count, .L_loop_top } +.L_loop_bot: + + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vldr s3} + { nop ; xm.vsign } + + + { nop ; xm.vlmul0 s3} + xm.vlmul1 s3 + + { addi s3,sp, (STACK_TMP_VEC)*4 ; xm.vgetc t3} + { nop; xm.vstr s3} + { nop; xm.vladd s3} + + { addi s3,sp, (STACK_TMP_VEC)*4 ; xm.vsetc t3} + { nop ; xm.vstd s3} + { nop ; xm.vpos } + xm.vstrpv s3, tail + { nop ; xm.vldr s3} + { nop ; xm.vstr s3} + xm.vstrpv a, tail + +.L_finish: + { li a0, 32 ; xm.vgetc t3} + { srli a1, t3, 8 ; nop } + { xm.zexti t3, 5 ; xm.shr a0, a0, a1 } + { addi t3, t3, 1 ; nop } + xm.lddsp s3,s2,0 + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_end_apply_op: +.size .L_apply_op, .L_end_apply_op - .L_apply_op + + + + + +.global vect_s16_abs +.type vect_s16_abs,@function +.set vect_s16_abs.nstackwords,NSTACKWORDS; .global vect_s16_abs.nstackwords /* Translation error on this line: unexpected token at position 41. */ +.set vect_s16_abs.maxcores,1; .global vect_s16_abs.maxcores /* Translation error on this line: unexpected token at position 28. */ +.set vect_s16_abs.maxtimers,0; .global vect_s16_abs.maxtimers /* Translation error on this line: unexpected token at position 29. */ +.set vect_s16_abs.maxchanends,0; .global vect_s16_abs.maxchanends /* Translation error on this line: unexpected token at position 31. */ +.size vect_s16_abs, .L_func_end_s16 - vect_s16_abs + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs_sum.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs_sum.S new file mode 100644 index 00000000..c668eb19 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_abs_sum.S @@ -0,0 +1,150 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +int32_t vect_s16_abs_sum( + const int16_t b[], + const unsigned length); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (8+8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s16_abs_sum + +#define STACK_VEC_TMP (NSTACKWORDS-24-4) +#define STACK_VEC_VR (NSTACKWORDS-32-4) +#define STACK_VEC_TMP2 (NSTACKWORDS-8-2) + +#define b x10 +#define N x11 +#define tail x12 +#define tmp x13 +#define neg_1 x18 +#define pos_2 x19 + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + li t3, 0x0100 + + { addi s4, sp, (STACK_VEC_TMP2)*4 ; nop } + addi s5, s4, (-30) + + { slli tail, N, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.vclrdr } + { srli N, N, EPV_LOG2_S16 ; xm.mkmsk tail, tail }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + la t3, vpu_vec_0x0002 + { mv pos_2, t3 ; nop } + la t3, vpu_vec_neg_1 + { mv neg_1, t3 ; xm.vldc t3} + { slli tmp, N, 5 ; nop } + { add t3, b, tmp ; xm.brff tail, .L_tail_dealt_with }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + { addi tmp,sp, (STACK_VEC_TMP)*4 ; xm.vldr t3} + { nop ; xm.vstd tmp} + xm.vstrpv tmp, tail + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vclrdr } + { nop ; xm.vlmaccr0 tmp} + xm.vlmaccr1 tmp + + {nop ; xm.vstd s4} + {nop ; xm.vldd s5} + {nop ; xm.vstr s4} + {nop ; xm.vldr s5} + + + { mv t3, tmp ; xm.vstr t3} + { nop ; xm.vldr t3} + { nop ; xm.vpos } + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vstr t3} + { nop ; xm.vldr t3} + { nop ; xm.vldc pos_2} + { nop ; xm.vlmaccr0 tmp} + xm.vlmaccr1 tmp + + {nop ; xm.vstd s4} + {nop ; xm.vldd s5} + {nop ; xm.vstr s4} + {nop ; xm.vldr s5} + + + + +.L_tail_dealt_with: + { addi tmp,sp, (STACK_VEC_TMP)*4 ; nop } + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.brff N, .L_loop_bot }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +.L_loop_top: + { nop ; xm.vldc neg_1} + { nop ; xm.vlmaccr0 b} + xm.vlmaccr1 b + + {nop ; xm.vstd s4} + {nop ; xm.vldd s5} + {nop ; xm.vstr s4} + {nop ; xm.vldr s5} + + + { mv t3, b ; xm.vstr t3} + { li t3, 32 ; xm.vldr t3} + { add b, b, t3 ; xm.vpos } + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vstr tmp} + { nop ; xm.vldr t3} + { nop ; xm.vldc pos_2} + { addi N, N, -1 ; xm.vlmaccr0 tmp} + xm.vlmaccr1 tmp + + {nop ; xm.vstd s4} + {nop ; xm.vldd s5} + {nop ; xm.vstr s4} + {nop ; xm.vldr s5} + + + { nop ; xm.bt N, .L_loop_top } +.L_loop_bot: + + +.L_finish: + + + + + + { addi a1,sp, (STACK_VEC_TMP)*4 ; nop/* xm.vadddr */ } + addi s4, a1, 32-2 + { nop ; xm.vstd a1} + { nop ; lw a0, 0(s4)} + { slli a0, a0, 16 ; xm.vstr a1} + { nop ; lw a1, 0(s4)} + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + { xm.zexti a1, 16 ; nop} + { or a0, a0, a1 ; xm.retsp (NSTACKWORDS)*4 } + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_end: + .size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmax.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmax.S new file mode 100644 index 00000000..cf1e263d --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmax.S @@ -0,0 +1,165 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +unsigned vect_s16_argmax( + const int16_t b[], + const unsigned length); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (3) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s16_argmax + +#define STACK_VEC_MAX_DEX (NSTACKWORDS-8-4) +#define STACK_VEC_CUR_MAX (NSTACKWORDS-16-4) +#define STACK_VEC_CUR_DEX (NSTACKWORDS-24-4) + +#define STACK_N 6 + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define vec_16s x12 // ![0x%X] +#define tmp x13 // ![%d] +#define tmz x18 // ![%d] +#define cur_max x19 // ![0x%08X] +#define mask_0xF x20 // ![0x%04X] + + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + li t3, 0x100 //16-bit mode +{ xm.mkmski mask_0xF, 4 ; sw N, (STACK_N)*4 (sp)} +{ srli N, N, 4 ; xm.vsetc t3} + +// cur_max[i] = -0x8000 + la t3, vpu_vec_0x8000 +{ addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vldr t3} +{ nop ; xm.vstr t3} + +// cur_dex[i] = i +{ addi tmp,sp, (STACK_VEC_CUR_DEX)*4 ; li t3, 15 } +.L_setup_cur_dex: + xm.st16 t3, t3(tmp) + { addi t3, t3, -1 ; xm.bt t3, .L_setup_cur_dex } + +// max_dex[i] = -1 + la t3, vpu_vec_neg_1 +{ addi t3,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} +{ addi cur_max,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vstr t3} + + la t3, vpu_vec_0x0010 +{ mv vec_16s, t3 ; xm.vclrdr } +{ mv t3, b ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + +/* + - compute cur_max[k] - b[k] + - create a mask from values less than 0 + cur_max[k] - b[k] < 0 --> b[k] > cur_max[k] + - overwrite cur_max[k] and max_dex[k] where b[k] > cur_max[k] + - increment cur_dex[k] by 1 (all cur_dex[] elements are the same) + - repeat on next 16 elements of b[] + + Note: This replaces max_dex[k] when b[k] > cur_max[k], NOT when b[k] >= cur_max[k], + so this loop prefers earlier indexes + +*/ +.L_loop_top: + { mv b, t3 ; xm.vldr t3} + { addi N, N, -1 ; xm.vlsub cur_max} + { addi t3,sp, 0 ; xm.vdepth1 } + xm.vstrpv t3, mask_0xF + { mv t3, b ; lw tmp, 0 (sp)} + { mv tmz, tmp ; nop } +xm.zip tmz, tmp, 0 + { addi t3,sp, (STACK_VEC_CUR_DEX)*4 ; xm.vldr t3} + xm.vstrpv cur_max, tmp + { addi tmz,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} + xm.vstrpv tmz, tmp + { nop ; xm.vladd vec_16s} + { li t3, 32 ; xm.vstr t3} + { add t3, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + +{ nop ; lw N, (STACK_N)*4 (sp)} +{ xm.zexti N, 4 ; nop } +{ xm.mkmsk N, N ; xm.brff N, .L_no_tail }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +{ mv b, t3 ; xm.vldr t3} +{ addi t3,sp, 0 ; xm.vlsub cur_max} +{ nop ; xm.vdepth1 } + xm.vstrpv t3, mask_0xF +{ mv t3, b ; lw tmp, 0 (sp)} +{ and tmp, tmp, N ; and tmz, tmp, N } +xm.zip tmz, tmp, 0 +{ addi t3,sp, (STACK_VEC_CUR_DEX)*4 ; xm.vldr t3} + xm.vstrpv cur_max, tmp +{ addi tmz,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} + xm.vstrpv tmz, tmp + +.L_no_tail: + +#undef cur_max +#undef vec_16s +#undef mask_0xF + +#define cur_max x19 // ![%d] +#define max_dex x12 // ![0x%08X] + +{ addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; li N, 15 } +{xm.ldawsp x20, STACK_VEC_MAX_DEX *4 ; nop} +xm.ld16s cur_max, N(x28) +xm.ld16s max_dex, N(x20) +{ addi N, N, -1 ; nop} +.L_loop2_top: ; +xm.ld16s x10, N(x28) +slt tmp, x10, cur_max +xm.ld16s x21, N(x20) +xm.eq tmz, a0, cur_max +{nop ; xm.bt tmp, .L_less_than } + .L_greater_or_equal: + { xm.slt tmp, s5, max_dex ; xm.brff tmz, .L_greater }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_equal: + { nop ; xm.brff tmp, .L_less_than }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_greater: + { mv cur_max, a0 ; mv max_dex, s5 } + + .L_less_than: + { addi N, N, -1 ; xm.bt N, .L_loop2_top } + + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 +{ mv a0, max_dex ; xm.retsp (NSTACKWORDS)*4 } + + +.L_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmin.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmin.S new file mode 100644 index 00000000..7a234042 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_argmin.S @@ -0,0 +1,184 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +unsigned vect_s16_argmin( + const int16_t b[], + const unsigned length); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (3) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s16_argmin + +#define STACK_VEC_MAX_DEX (NSTACKWORDS-8-4) +#define STACK_VEC_CUR_MAX (NSTACKWORDS-16-4) +#define STACK_VEC_CUR_DEX (NSTACKWORDS-24-4) + +#define STACK_N 6 + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define vec_16s x12 // ![0x%X] +#define tmp x13 // ![%d] +#define tmz x18 // ![%d] +#define cur_min x19 // ![0x%08X] +#define mask_0xF x20 // ![0x%04X] +#define vec_ones x21 // ![0x%08X] + + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + li t3, 0x100 +{ xm.mkmski mask_0xF, 4 ; sw N, (STACK_N)*4 (sp)} +{ srli N, N, 4 ; xm.vsetc t3} + +// cur_min[i] = 0x7FFF + la t3, vpu_vec_0x7FFF +{ addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vldr t3} +{ nop ; xm.vstr t3} + +// cur_dex[i] = i +{ addi tmp,sp, (STACK_VEC_CUR_DEX)*4 ; li t3, 15 } +.L_setup_cur_dex: + xm.st16 t3, t3(tmp) + { addi t3, t3, -1 ; xm.bt t3, .L_setup_cur_dex } + +// min_dex[i] = -1 + la t3, vpu_vec_neg_1 +{ addi t3,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} +{ addi cur_min,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vstr t3} + + la t3, vpu_vec_0x0010 +{ mv vec_16s, t3 ; xm.vclrdr } + la t3, vpu_vec_0x0001 +{ mv vec_ones, t3 ; nop } +{ mv t3, b ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + +/* + + vR[k] = b[k] + vR[k] = cur_min[k] - b[k] + vR[k] = (cur_min[k] - b[k]) < 0 = cur_min[k] < b[k] + + vR[k] = !vR[k] = !(cur_min[k] < b[k]) = (cur_min[k] >= b[k]) + + So, this will replace the indexes if b[k] is equal to cur_min[k] + + Instead, we want: + + !vR[k] = (cur_min[k] > b[k]) + = cur_min[k] >= b[k] + 1 + + vR[k] = !(cur_min[k] >= b[k] + 1) + vR[k] = cur_min[k] < b[k] + 1 + vR[k] = cur_min[k] - (b[k]+1) < 0 + vR[k] = cur_min[k] - (b[k] + 1) + + + +*/ + + +.L_loop_top: + { mv b, t3 ; xm.vldr t3} + { nop ; xm.vladd vec_ones} + { addi N, N, -1 ; xm.vlsub cur_min} + { addi t3,sp, 0 ; xm.vdepth1 } + xm.vstrpv t3, mask_0xF + { mv t3, b ; lw tmp, 0 (sp)} + { xm.not tmz, tmp ; xm.not tmp, tmp } +xm.zip tmz, tmp, 0 + { addi t3,sp, (STACK_VEC_CUR_DEX)*4 ; xm.vldr t3} + xm.vstrpv cur_min, tmp + { addi tmz,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} + xm.vstrpv tmz, tmp + { nop ; xm.vladd vec_16s} + { li t3, 32 ; xm.vstr t3} + { add t3, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + +{ nop ; lw N, (STACK_N)*4 (sp)} +{ xm.zexti N, 4 ; nop } +{ xm.mkmsk N, N ; xm.brff N, .L_no_tail }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +{ mv b, t3 ; xm.vldr t3} +{ nop ; xm.vladd vec_ones} +{ addi t3,sp, 0 ; xm.vlsub cur_min} +{ nop ; xm.vdepth1 } + xm.vstrpv t3, mask_0xF +{ mv t3, b ; lw tmp, 0 (sp)} +{ xm.not tmp, tmp ; nop } +{ and tmp, tmp, N ; and tmz, tmp, N } +xm.zip tmz, tmp, 0 +{ addi t3,sp, (STACK_VEC_CUR_DEX)*4 ; xm.vldr t3} + xm.vstrpv cur_min, tmp +{ addi tmz,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} + xm.vstrpv tmz, tmp + +.L_no_tail: + +#undef cur_min +#undef vec_16s +#undef mask_0xF + +#define cur_min x19 // ![%d] +#define min_dex x12 // ![0x%08X] + + +{ addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; li N, 15 } +{ xm.ldawsp x20, (STACK_VEC_MAX_DEX)*4 ; nop} +xm.ld16s cur_min, N(t3) +xm.ld16s min_dex, N(x20) +{ addi N, N, -1 ; nop} +.L_loop2_top: +xm.ld16s x10, N(x28) +xm.slt tmp, cur_min, x10 +xm.ld16s x21, N(x20) +xm.eq tmz, cur_min, a0 +{nop; xm.bt tmp, .L_greater_than } + .L_less_or_equal: + { xm.slt tmp, s5, min_dex ; xm.brff tmz, .L_less }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_equal: + { nop ; xm.brff tmp, .L_greater_than }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_less: + { mv cur_min, a0 ; mv min_dex, s5 } + + .L_greater_than: + { addi N, N, -1 ; xm.bt N, .L_loop2_top } + + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 +{ mv a0, min_dex ; xm.retsp (NSTACKWORDS)*4 } + +.L_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_clip.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_clip.S new file mode 100644 index 00000000..59fb331d --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_clip.S @@ -0,0 +1,331 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +headroom_t vect_s16_clip( + int16_t a[], + const int16_t b[], + const unsigned length, + const int16_t lower_bound, + const int16_t upper_bound, + const int b_shr); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (6) +#define NSTACKWORDS (8 + 8*(NSTACKVECS)+4) + +#define FUNCTION_NAME vect_s16_clip + +#define STACK_VEC(K) (NSTACKWORDS - (8*((K)+1))-4) + +#define a x10 +#define b x11 +#define N x12 +#define lower x13 +#define upper x18 +#define b_shr x19 +#define tail x20 +#define tmp1 x21 +#define tmp2 x22 +#define int_max x23 +#define int_min x24 + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + li t3, 0x0100 + xm.stdsp s7,s6,16 + { slli tail, N, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; sw s8, 24 (sp)} + + { li tmp1, 15 ; srli N, N, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.mkmsk int_max, tmp1 ; xm.vclrdr } + { xm.addi int_min, int_max, 1 ; xm.mkmsk tail, tail } + + // If upper >= 0 and lower <= 0, we can do this more efficiently. + mv upper, a4 + { li tmp1, 0 ; nop} + mv b_shr, a5 + { xm.slt tmp2, upper, tmp1 ; nop} + { xm.slt tmp1, tmp1, lower ; nop } + bnez tmp2, .L_lower_nice + bnez tmp1, .L_upper_nice + + + // Otherwise, we have the nice situation. +.L_nice: + + //In the nice situation, the upper bound is no more than 1 VLADD away from the positive saturation + // point of the VPU, and the lower bound is no more than 1 VLADD away from the negative saturation + // point of the VPU. + + { addi t3,sp, (STACK_VEC(0))*4 ; sub upper, int_max, upper } + { mv tmp1, upper ; mv tmp2, upper } +xm.zip tmp2, tmp1, 4 + { nop ; xm.bl .L_std_func1 } + + { addi t3,sp, (STACK_VEC(2))*4 ; xm.neg upper, upper } + { mv tmp1, upper ; mv tmp2, upper } +xm.zip tmp2, tmp1, 4 + { nop ; xm.bl .L_std_func1 } + + { addi t3,sp, (STACK_VEC(1))*4 ; sub lower, int_min, lower } + { mv tmp1, lower ; mv tmp2, lower } +xm.zip tmp2, tmp1, 4 + { nop ; xm.bl .L_std_func1 } + + { addi t3,sp, (STACK_VEC(3))*4 ; xm.neg lower, lower } + { mv tmp1, lower ; mv tmp2, lower } +xm.zip tmp2, tmp1, 4 + { nop ; xm.bl .L_std_func1 } + + { nop ; xm.bu .L_std_func_end1 } +.L_std_func1: + xm.stdi tmp1,tmp1, 0(t3) + xm.stdi tmp1,tmp1, 8(t3) + xm.stdi tmp1,tmp1, 16(t3) + xm.stdi tmp1,tmp1, 24(t3) + ret +.L_std_func_end1: + +#define vec_upper upper +#define vec_lower lower +#define vec_nupper tmp1 +#define vec_nlower tmp2 +#define _32 int_min + +//{ nop; xm.ldawsp vec_upper, STACK_VEC(0)*4 } +//{ nop; xm.ldawsp vec_lower, STACK_VEC(1)*4 } +//{ nop; xm.ldawsp vec_nupper, STACK_VEC(2)*4 } +//{ nop; xm.ldawsp vec_nlower, STACK_VEC(3)*4 } +{addi vec_upper,sp, (STACK_VEC(0))*4 ; nop} +{addi vec_lower,sp, (STACK_VEC(1))*4 ; nop} +{addi vec_nupper,sp, (STACK_VEC(2))*4 ; nop} +{addi vec_nlower,sp, (STACK_VEC(3))*4 ; nop} + { li _32, 32 ; xm.brff N, .L_nice_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_nice_loop_top: + xm.vlashr b, b_shr + { add b, b, _32 ; xm.vladd vec_upper} + { addi N, N, -1 ; xm.vladd vec_nupper} + { nop ; xm.vladd vec_lower} + { nop ; xm.vladd vec_nlower} + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt N, .L_nice_loop_top } + .L_nice_loop_bot: + + beqz tail, .L_finish + xm.vlashr b, b_shr + { nop ; xm.vladd vec_upper} + { nop ; xm.vladd vec_nupper} + { nop ; xm.vladd vec_lower} + { nop ; xm.vladd vec_nlower} + j .L_finishish + +/* + C logic: + + void clip16(int16_t output[], int16_t input[], int16_t lower, int16_t upper, unsigned length, int input_shr) + { + if(upper >= 0 && lower <= 0){ + + int16_t up_thing = VPU_INT16_MAX - upper; + int16_t lo_thing = VPU_INT16_MIN - lower; + + // 7 instructions required + for(unsigned int i = 0; i < length; i++){ + + int16_t tmp = input[i] >> input_shr; + tmp = SATURATING_ADD(tmp, up_thing); + tmp = tmp - up_thing; + tmp = SATURATING_ADD(tmp, lo_thing); + tmp = tmp - lo_thing + + output[i] = tmp; + } + } else { + + int16_t one, two, three; + + if(upper >= 0){ + one = VPU_INT16_MAX - upper; + two = VPU_INT16_MIN; + three = VPU_INT16_MIN - (lower - upper); + } else { + one = VPU_INT16_MIN - lower; + two = VPU_INT16_MAX; + three = VPU_INT16_MAX - (upper - lower); + } + + // 9 instructions required + for(unsigned int i = 0; i < length; i++){ + + int16_t tmp = input[i] >> input_shr; + tmp = SATURATING_ADD(tmp, one); + tmp = tmp - one; + tmp = tmp + two; + tmp = SATURATING_ADD(tmp, three); + tmp = tmp - three; + tmp = tmp - two; + + output[i] = tmp; + } + } + } + +*/ + + + +#undef vec_upper +#undef vec_lower +#undef vec_nupper +#undef vec_nlower +#undef _32 + +#define vec_one upper +#define vec_two lower +#define vec_three tmp1 + +#define vec_none tmp2 +#define vec_ntwo int_max +#define vec_nthree int_min + + // The nice thing about the not nice scenario is that at least one of the two bounds is + // guaranteed to be within one VLADD of the relevant saturation point. + +.L_upper_nice: + + { sub vec_one, int_max, upper ; xm.neg vec_three, lower } + addi vec_three, vec_three, -1 + { addi vec_two, int_min, 1 ; xm.bu .L_not_nice_thing } + +.L_lower_nice: + { sub vec_one, int_min, lower ; xm.neg vec_three, upper } + { mv vec_two, int_max ; nop } + + +.L_not_nice_thing: + + { addi t3,sp, (STACK_VEC(0))*4 ; nop } + { mv s6, vec_one ; mv s7, vec_one } +xm.zip s7, s6, 4 + { nop ; xm.bl .L_std_func } + + { addi t3,sp, (STACK_VEC(1))*4 ; nop } + { mv s6, vec_two ; mv s7, vec_two } +xm.zip s7, s6, 4 + { nop ; xm.bl .L_std_func } + + { addi t3,sp, (STACK_VEC(3))*4 ; nop } +{ xm.neg s6, vec_one ; nop} +{nop; xm.neg s7, vec_one } +xm.zip s7, s6, 4 + { nop ; xm.bl .L_std_func } + + { addi t3,sp, (STACK_VEC(2))*4 ; nop } +{ xm.neg s6, vec_two ; nop} +{nop; xm.neg s7, vec_two } +xm.zip s7, s6, 4 + { nop ; xm.bl .L_std_func } + + { addi t3,sp, (STACK_VEC(4))*4 ; nop } + { mv s6, vec_three ; mv s7, vec_three } +xm.zip s7, s6, 4 + { nop ; xm.bl .L_std_func } + + { addi t3,sp, (STACK_VEC(5))*4 ; nop } +{ xm.neg s6, vec_three ; nop } +{ nop; xm.neg s7, vec_three } +xm.zip s7, s6, 4 + { nop ; xm.bl .L_std_func } + + { nop ; xm.bu .L_std_func_end } +.L_std_func: + xm.stdi s6,s6, 0(t3) + xm.stdi s6,s6, 8(t3) + xm.stdi s6,s6, 16(t3) + xm.stdi s6,s6, 24(t3) + ret +.L_std_func_end: +/* +{ nop; xm.ldawsp vec_one, STACK_VEC(0) *4 } +{ nop; xm.ldawsp vec_none, STACK_VEC(3) *4 } +{ nop; xm.ldawsp vec_two, STACK_VEC(1) *4 } +{ nop; xm.ldawsp vec_ntwo, STACK_VEC(2) *4 } +{ nop; xm.ldawsp vec_three, STACK_VEC(4) *4 } +{ nop; xm.ldawsp vec_nthree, STACK_VEC(5)*4 } +*/ +{addi vec_one,sp, (STACK_VEC(0))*4 ; nop} +{addi vec_none,sp, (STACK_VEC(3))*4 ; nop} +{addi vec_two,sp, (STACK_VEC(1))*4 ; nop} +{addi vec_ntwo,sp, (STACK_VEC(2))*4 ; nop} +{addi vec_three,sp, (STACK_VEC(4))*4 ; nop} +{addi vec_nthree,sp, (STACK_VEC(5))*4 ; nop} + + { li t3, 32 ; xm.brff N, .L_not_nice_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +.L_not_nice_loop_top: + xm.vlashr b, b_shr + { add b, b, t3 ; xm.vladd vec_one} + { addi N, N, -1 ; xm.vladd vec_none} + { nop ; xm.vladd vec_two} + { nop ; xm.vladd vec_three} + { nop ; xm.vladd vec_nthree} + { nop ; xm.vladd vec_ntwo} + { add a, a, t3 ; xm.vstr a} + { nop ; xm.bt N, .L_not_nice_loop_top } +.L_not_nice_loop_bot: + + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, b_shr + { nop ; xm.vladd vec_one} + { nop ; xm.vladd vec_none} + { nop ; xm.vladd vec_two} + { nop ; xm.vladd vec_three} + { nop ; xm.vladd vec_nthree} + { nop ; xm.vladd vec_ntwo} + + +.L_finishish: + { nop ; xm.vstd tmp1} + xm.vstrpv a, tail + xm.vstrpv tmp1, tail + { nop ; xm.vldd tmp1} + { nop ; xm.vstd tmp1} + +.L_finish: + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 24 (sp)} + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c new file mode 100644 index 00000000..540ad8c1 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c @@ -0,0 +1,36 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined (__VX4B__) + +#include +#include + +#include "xmath/xmath.h" +#include "vpu_helper.h" +#include "xmath/xs3/vpu_scalar_ops.h" + + + +int64_t vect_s16_dot( + const int16_t b[], + const int16_t c[], + const unsigned length) +{ + //#warn vect_s16_dot is not yet optimised for vx4b. + + // Note: instead of using the 32-bit accumulators for this, the assembly version of this function implements + // makeshift 48-bit accumulators, which is why this is using a 64-bit int for accumulation. + vpu_int32_acc_t acc = 0; + + const int64_t upper_sat_bound = 0x7FFFFFFFFFFFLL; + const int64_t lower_sat_bound = -upper_sat_bound; + + for(unsigned k = 0; k < length; k++){ + acc += vlmacc16(0, b[k], c[k]); + acc = MAX(lower_sat_bound, MIN(upper_sat_bound, acc)); + } + + return acc; +} + +#endif \ No newline at end of file diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_energy.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_energy.S new file mode 100644 index 00000000..086ada65 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_energy.S @@ -0,0 +1,116 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +int32_t vect_s16_energy( + const int16_t b[], + const unsigned length, + const right_shift_t b_shr); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (4) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + + +#define FUNCTION_NAME vect_s16_energy + +#define STACK_VEC_TMP (NSTACKWORDS-8-16-4) +#define STACK_VEC_VR (NSTACKWORDS-16-16-4) +#define STACK_VEC_TMP2 (NSTACKWORDS-8-2) + +#define b x10 +#define N x11 +#define b_shr x12 +#define vec_tmp x13 +#define tail x18 + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + + { addi s4,sp, (STACK_VEC_TMP2)*4 ; nop } + addi s5, s4, (-30) + li t3, 0x100 + { nop ; addi vec_tmp,sp, (STACK_VEC_TMP)*4 } + { slli tail, N, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.vclrdr } + { srli N, N, EPV_LOG2_S16 ; xm.brff tail, .L_tail_dealt_with_s16 }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S16 \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + { nop ; slli N, N, 5 } + { add t3, b, N ; xm.vstd vec_tmp} + { xm.mkmsk tail, tail ; nop } + xm.vlashr t3, b_shr + xm.vstrpv vec_tmp, tail +#undef tail + + { nop ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { srli N, N, 5 ; xm.vlmaccr0 vec_tmp} + xm.vlmaccr1 vec_tmp + {nop ; xm.vstd s4} + {nop ; xm.vldd s5} + {nop ; xm.vstr s4} + {nop ; xm.vldr s5} + +.L_tail_dealt_with_s16: + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.brff N, .L_loop_bot_s16 }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + +.L_loop_top_s16: + { li t3, 32 ; xm.vstr t3} + xm.vlashr b, b_shr + { add b, b, t3 ; xm.vstr vec_tmp} + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vldc vec_tmp} + { nop ; xm.vldr t3} + { addi N, N, -1 ; xm.vlmaccr0 vec_tmp} + xm.vlmaccr1 vec_tmp + {nop ; xm.vstd s4} + {nop ; xm.vldd s5} + {nop ; xm.vstr s4} + {nop ; xm.vldr s5} + { nop ; xm.bt N, .L_loop_top_s16 } +.L_loop_bot_s16: + +.L_finish_s16: + + //{ nop ; xm.vadddr } + { nop ; xm.vstd vec_tmp} + addi s4, vec_tmp, 32-2 + { nop ; lw a1, 0(s4)} + { slli a1, a1, 16 ; xm.vstr vec_tmp} + { nop ; lw a0, 0(s4)} + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + { xm.zexti a0, 16 ;nop} + { or a0, a0, a1 ; xm.retsp (NSTACKWORDS)*4 } + + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_end: + .size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_high_byte.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_high_byte.S new file mode 100644 index 00000000..876bb503 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_high_byte.S @@ -0,0 +1,132 @@ +// Copyright 2021-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +void vect_s16_extract_high_byte( + int8_t a[], + const int16_t b[], + const unsigned len); +*/ + + +#include "../asm_helper.h" + +#define NSTACKWORDS (12+8+4) + +#define FUNCTION_NAME vect_s16_extract_high_byte + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_LEN (8) +#define STACK_TMP (0) + +#define a x10 +#define b x11 +#define len x12 +#define eight x13 +#define vec_tmp x18 +#define tmp x19 +#define _16 x20 +#define tail x21 +#define vec_0x007F x22 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x100 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { xm.mkmski s8, 4 ; sw s8, 4 (sp)} + { mv t3, len ; xm.vsetc t3} + { xm.zexti t3, 4 ; srli len, len, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.mkmsk tail, t3 } + { addi tmp,sp, (STACK_TMP)*4 ; sw len, (STACK_LEN)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ + +// First thing, write 0x80 to all outputs. +lui t3, %hi(vpu_vec_0x80) + addi t3,t3, %lo(vpu_vec_0x80) + { li _16, 16 ; mv s6, a } + { li eight, 8 ; xm.vldr t3} + { xm.mkmski t3, 16 ; xm.brff len, .L_set_0x80_loop_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_set_0x80_loop_top: + { addi len, len, -1 ; nop } + xm.vstrpv s6, t3 + { add s6, s6, _16 ; xm.bt len, .L_set_0x80_loop_top } + .L_set_0x80_loop_end: + { nop ; xm.brff tail, .L_0x80_no_tail }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vstrpv s6, tail + .L_0x80_no_tail: + +// Now that that's done, actually compute outputs, only overwriting those that shouldn't be 0x80 +// (this is to avoid symmetric saturation) + +lui t3, %hi(vpu_vec_0x007F) + addi t3,t3, %lo(vpu_vec_0x007F) + { nop ; lw len, (STACK_LEN)*4 (sp)} + { mv vec_0x007F, t3 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr b, eight +{ xm.neg eight, eight ; nop} +{nop;xm.vstr vec_tmp} + { add b, b, _16 ; xm.vladd vec_0x007F} + { xm.mkmski t3, 16 ; xm.vdepth1 } + xm.vstrpv tmp, s8 + { addi len, len, -1 ; lw s7,0 ( tmp)} + { xm.andnot t3, s7 ; add b, b, _16 } + xm.vlashr vec_tmp, eight +{ xm.neg eight, eight ; nop} +{nop;xm.vdepth8 } + xm.vstrpv a, t3 + { add a, a, _16 ; xm.bt len, .L_loop_top } + +.L_loop_bot: + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, eight +{ xm.neg eight, eight ; nop} +{nop;xm.vstr vec_tmp} + { nop ; xm.vladd vec_0x007F} + { nop ; xm.vdepth1 } + xm.vstrpv tmp, s8 + { nop ; lw s7,0 ( tmp)} + { xm.andnot tail, s7 ; nop } + xm.vlashr vec_tmp, eight + { nop ; xm.vdepth8 } + xm.vstrpv a, tail + + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + { nop ; lw s8, 4 (sp)} + { nop ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_low_byte.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_low_byte.S new file mode 100644 index 00000000..c8dfb1f8 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_extract_low_byte.S @@ -0,0 +1,129 @@ +// Copyright 2021-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +void vect_s16_extract_low_byte( + int8_t a[], + const int16_t b[], + const unsigned len); +*/ + + +#include "../asm_helper.h" + +#define NSTACKWORDS (8+8+4) + +#define FUNCTION_NAME vect_s16_extract_low_byte + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define STACK_LEN (6) +#define STACK_TMP (7) + +#define a x10 +#define b x11 +#define len x12 +#define tmpA x13 +#define tmpB x18 +#define _16 x19 +#define tail x20 +#define vec_0x7FFF x21 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x100 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + { mv t3, len ; xm.vsetc t3} + { xm.zexti t3, 4 ; srli len, len, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { addi tmpA,sp, (STACK_TMP)*4 ; xm.mkmsk tail, t3 } + { nop ; sw len, (STACK_LEN)*4 (sp)} + +// First thing, write 0x80 to all outputs. + la t3, vpu_vec_0x80 + { li _16, 16 ; mv tmpB, a } + { nop ; xm.vldr t3} + { xm.mkmski t3, 16 ; xm.brff len, .L_set_0x80_loop_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_set_0x80_loop_top: + { addi len, len, -1 ; nop } + xm.vstrpv tmpB, t3 + { add tmpB, tmpB, _16 ; xm.bt len, .L_set_0x80_loop_top } + .L_set_0x80_loop_end: + { nop ; xm.brff tail, .L_0x80_no_tail }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vstrpv tmpB, tail + .L_0x80_no_tail: + +// Now that that's done, actually compute outputs, only overwriting those that shouldn't be 0x80 +// (this is to avoid symmetric saturation) + + la t3, vpu_vec_0x0100 + { nop ; xm.vldc t3} + la t3, vpu_vec_0x7FFF + { mv vec_0x7FFF, t3 ; lw len, (STACK_LEN)*4 (sp)} + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.brff len, .L_loop_bot }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + { addi len, len, -1 ; xm.vclrdr } + {nop ; xm.vlmacc0 b} + xm.vlmacc1 b + { add b, b, _16 ;nop} + { add b, b, _16 ; xm.vstr t3} + { xm.mkmski tmpB, 4 ; xm.vladd vec_0x7FFF} + { nop ; xm.vdepth1 } + xm.vstrpv tmpA, tmpB + { xm.mkmski tmpB, 16 ; lw s3,0 ( tmpA)} +{ xm.andnot tmpB, s3 ; nop} +{nop;xm.vldr t3} + { li _16, 16 ; xm.vdepth8 } + xm.vstrpv a, tmpB + { add a, a, _16 ; xm.bt len, .L_loop_top } + +.L_loop_bot: + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vlmacc0 b} + xm.vlmacc1 b + { nop ; xm.vstr t3} + { nop ; xm.vladd vec_0x7FFF} + { xm.mkmski tmpB, 4 ; xm.vdepth1 } + xm.vstrpv tmpA, tmpB + { nop ; lw s3,0 ( tmpA)} +{ xm.andnot tail, s3 ; nop} +{nop;xm.vldr t3} + { nop ; xm.vdepth8 } + xm.vstrpv a, tail + + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + { nop ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_inverse.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_inverse.S new file mode 100644 index 00000000..1cdad3fe --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_inverse.S @@ -0,0 +1,66 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +void vect_s16_inverse( + int16_t a[], + const int16_t b[], + const unsigned length, + const unsigned scale); + +*/ + +#define NSTACKWORDS (4) + +#define FUNCTION_NAME vect_s16_inverse + +#define a x10 +#define b x11 +#define length x12 +#define scale x13 + +.text +.p2align 2 + + +FUNCTION_NAME: + +{ xm.mkmsk scale, scale ; xm.entsp (NSTACKWORDS)*4 } +{ sw s2, 0(sp); li s2, 15} +xm.mkmsk s2, s2 +{ addi scale, scale, 1 ; nop } +{ addi length, length, -1 ; xm.brff length, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top: + xm.ld16s t3, length(b) + div t3, scale, t3 + xm.min t3, t3, s2 + xm.st16 t3, length(a) + { addi length, length, -1 ; xm.bt length, .L_loop_top } + +.L_finish: + lw s2, 0(sp) + xm.retsp (NSTACKWORDS)*4 + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_macc.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_macc.S new file mode 100644 index 00000000..7da99081 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_macc.S @@ -0,0 +1,122 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +/* +headroom_t vect_s16_macc( + int16_t acc[], + const int16_t b[], + const int16_t c[], + const unsigned len, + const int acc_shr, + const int bc_shr); +*/ + + +#include "../asm_helper.h" + +#define NSTACKVECTS (2) +#define NSTACKWORDS (8 + 8*NSTACKVECTS+4) + +#define FUNCTION_NAME vect_s16_macc + +#define STACK_VEC_SAT (NSTACKWORDS-8-4) +#define STACK_VEC_TMP (NSTACKWORDS-16-4) + +#define acc x10 +#define b x11 +#define c x12 +#define len x13 +#define bc_shr x18 +#define _32 x19 +#define tmp x20 +#define tail x21 +#define acc_shr x22 +#define mask x23 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x100 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { slli t3, len, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + mv bc_shr, a5 + { xm.zexti t3, 5 ; nop} + { slli tmp, bc_shr, 16 ; xm.zexti bc_shr, 16 } + { or bc_shr, tmp, bc_shr ; srli len, len, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + mv acc_shr, a4 + { addi tmp,sp, (STACK_VEC_TMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + + xm.stdsp bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 0)*8 + xm.stdsp bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 1)*8 + xm.stdsp bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 2)*8 + xm.stdsp bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 3)*8 + + { addi bc_shr,sp, (STACK_VEC_SAT)*4 ; xm.mkmsk tail, t3 } + { li _32, 32 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { xm.mkmski mask, 32 ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr acc, acc_shr + xm.vstrpv acc, mask + { addi len, len, -1 ; xm.vclrdr } + { add b, b, _32 ; xm.vldc b} + { nop ; xm.vlmacc0 c} + xm.vlmacc1 c + { add c, c, _32 ; nop} + xm.vlsat bc_shr + { nop ; xm.vladd acc} + { add acc, acc, _32 ; xm.vstr acc} + { nop ; xm.bt len, .L_loop_top } + +.L_loop_bot: + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr acc, acc_shr + xm.vstrpv acc, tail + { nop ; xm.vclrdr } + { nop ; xm.vldc b} + { nop ; xm.vlmacc0 c} + xm.vlmacc1 c + { mv t3, bc_shr ; nop} + xm.vlsat bc_shr + { nop ; xm.vladd acc} + { nop ; xm.vstd t3} + xm.vstrpv t3, tail + xm.vstrpv acc, tail + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_max.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_max.S new file mode 100644 index 00000000..81c13c40 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_max.S @@ -0,0 +1,114 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +int32_t vect_s16_max( + const int32_t b[], + const unsigned length); + + +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s16_max + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_CUR_MAX (NSTACKWORDS-16-4) + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define tail x12 // ![0x%X] +#define tmp x13 // ![%d] +#define tmz x18 // ![%d] + + + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + li t3, 0x100 + { slli tail, N, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { srli N, N, EPV_LOG2_S16 ; nop }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { slli tmp, N, 5 ; nop } + la t3, vpu_vec_0x8000 + { addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vldr t3} + { add t3, b, tmp ; xm.vstr t3} + { xm.zexti tail, 5 ; xm.vldr t3} + { xm.mkmsk tail, tail ; addi t3,sp, (STACK_VEC_CUR_MAX)*4 } + xm.vstrpv t3, tail + + // Tail is fully accounted for in cur_max now. + +#undef tail +#define cur_max x12 // ![0x%08X] + + { addi tmp,sp, (STACK_VEC_TMP)*4 ; mv cur_max, t3 } + { nop ; xm.vclrdr } + { mv t3, b ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + // cur_max[] saved in stack + + { mv b, t3 ; xm.vldr t3} // vR[i] = b[i] + { addi N, N, -1 ; xm.vlsub cur_max} // vR[i] = cur_max[i] - b[i] + { addi t3,sp, 0 ; xm.vdepth1 } // vR[0] = [bitmask -- 1 where vR[i] < 0] b[i] > cur_max[i] + { xm.mkmski tmp, 2 ; nop } + xm.vstrpv t3, tmp + { mv t3, b ; lw tmp, 0 (sp)} + { mv tmz, tmp ; xm.vldr t3} +xm.zip tmz, tmp, 0 + { nop ; li t3, 32 } + xm.vstrpv cur_max, tmp + { add t3, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + + { addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; li N, 15 } + xm.ld16s cur_max, N(x28) + addi t3,sp, (STACK_VEC_CUR_MAX)*4 + { addi t3, t3, -2 ; nop } + .L_loop2_top: + xm.ld16s a0, N(x28) + { addi N, N, -1 ; nop} + { xm.slt tmp, a0, cur_max ; nop } + { xm.shli tmp, tmp, 1; nop} + { nop ; xm.bru tmp } + { mv cur_max, a0 ; nop } + { mv a0, cur_max ; xm.bt N, .L_loop2_top } + + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_end: + .size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_min.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_min.S new file mode 100644 index 00000000..8f5f0782 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_min.S @@ -0,0 +1,115 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +int32_t vect_s16_min( + const int32_t b[], + const unsigned length); + + +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s16_min + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_CUR_MIN (NSTACKWORDS-16-4) + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define tail x12 // ![0x%X] +#define tmp x13 // ![%d] +#define tmz x18 // ![%d] + + + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + li t3, 0x100 + { slli tail, N, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { srli N, N, EPV_LOG2_S16 ; nop }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { slli tmp, N, 5 ; nop } + la t3, vpu_vec_0x7FFF + { addi t3,sp, (STACK_VEC_CUR_MIN)*4 ; xm.vldr t3} + { add t3, b, tmp ; xm.vstr t3} + { xm.zexti tail, 5 ; xm.vldr t3} + { xm.mkmsk tail, tail ; addi t3,sp, (STACK_VEC_CUR_MIN)*4 } + xm.vstrpv t3, tail + + // Tail is fully accounted for in cur_min now. + +#undef tail +#define cur_min x12 // ![0x%08X] + + { addi tmp,sp, (STACK_VEC_TMP)*4 ; mv cur_min, t3 } + { nop ; xm.vclrdr } + { mv t3, b ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + // cur_min[] saved in stack + + { mv b, t3 ; xm.vldr t3} // vR[i] = b[i] + { addi N, N, -1 ; xm.vlsub cur_min} // vR[i] = cur_min[i] - b[i] + { addi t3,sp, 0 ; xm.vdepth1 } // vR[0] = [bitmask -- 1 where vR[i] < 0] b[i] > cur_min[i] + { xm.mkmski tmp, 2 ; nop } + xm.vstrpv t3, tmp + { mv t3, b ; lw tmp, 0 (sp)} + { mv tmz, tmp ; xm.vldr t3} + xm.zip tmz, tmp, 0 + { xm.not tmp, tmp ; li t3, 32 } + xm.vstrpv cur_min, tmp + { add t3, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + + { addi t3,sp, (STACK_VEC_CUR_MIN)*4 ; li N, 15 } + xm.ld16s cur_min, N(x28) + addi t3,sp, (STACK_VEC_CUR_MIN)*4 + { addi t3, t3, -2 ; nop } + .L_loop2_top: + + xm.ld16s a0, N(x28) + { addi N, N, -1 ; nop} + { xm.slt tmp, cur_min, a0 ; nop } + {xm.shli tmp, tmp, 1; nop} + { nop ; xm.bru tmp } + { mv cur_min, a0 ; nop } + { mv a0, cur_min ; xm.bt N, .L_loop2_top } + + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_end: + .size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_mul.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_mul.S new file mode 100644 index 00000000..4d923bd6 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_mul.S @@ -0,0 +1,106 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +headroom_t vect_s16_mul( + int16_t a[], + const int16_t b[], + const int16_t c[], + const unsigned len, + const int a_shr); +*/ + + +#include "../asm_helper.h" + +#define NSTACKWORDS (8+8) + +#define FUNCTION_NAME vect_s16_mul + +#define STACK_VEC_SAT 0 + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define a_shr x18 +#define _32 x19 +#define tmp x20 +#define tail x21 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x100 + xm.stdsp s3,s2,32 + xm.stdsp s5,s4,40 + { slli t3, len, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + mv a_shr, a4 + { xm.zexti t3, 5 ; nop} + { slli tmp, a_shr, 16 ; xm.zexti a_shr, 16 } + { or a_shr, tmp, a_shr ; srli len, len, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + + xm.stdsp a_shr,a_shr,((STACK_VEC_SAT)/2 + 0)*8 + xm.stdsp a_shr,a_shr,((STACK_VEC_SAT)/2 + 1)*8 + xm.stdsp a_shr,a_shr,((STACK_VEC_SAT)/2 + 2)*8 + xm.stdsp a_shr,a_shr,((STACK_VEC_SAT)/2 + 3)*8 + + { addi a_shr,sp, (STACK_VEC_SAT)*4 ; xm.mkmsk tail, t3 } + { li _32, 32 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + { addi len, len, -1 ; xm.vclrdr } + { add b, b, _32 ; xm.vldc b} + { nop ; xm.vlmacc0 c} + xm.vlmacc1 c + {add c, c, _32 ; nop} + xm.vlsat a_shr + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt len, .L_loop_top } + +.L_loop_bot: + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vldc b} + { nop ; xm.vlmacc0 c} + xm.vlmacc1 c + { mv t3, a_shr ; nop} + xm.vlsat a_shr + { nop ; xm.vstd t3} + xm.vstrpv t3, tail + xm.vstrpv a, tail + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,32 + xm.lddsp s5,s4,40 + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_nmacc.S new file mode 100644 index 00000000..21573984 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_nmacc.S @@ -0,0 +1,122 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +/* +headroom_t vect_s16_nmacc( + int16_t acc[], + const int16_t b[], + const int16_t c[], + const unsigned len, + const int acc_shr, + const int bc_shr); +*/ + + +#include "../asm_helper.h" + +#define NSTACKVECTS (2) +#define NSTACKWORDS (8 + 8*NSTACKVECTS+4) + +#define FUNCTION_NAME vect_s16_nmacc + +#define STACK_VEC_SAT (NSTACKWORDS-8-4) +#define STACK_VEC_TMP (NSTACKWORDS-16-4) + +#define acc x10 +#define b x11 +#define c x12 +#define len x13 +#define bc_shr x18 +#define _32 x19 +#define tmp x20 +#define tail x21 +#define acc_shr x22 +#define mask x23 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x100 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + { slli t3, len, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + mv bc_shr, a5 + { xm.zexti t3, 5 ; nop} + { slli tmp, bc_shr, 16 ; xm.zexti bc_shr, 16 } + { or bc_shr, tmp, bc_shr ; srli len, len, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + mv acc_shr, a4 + { addi tmp,sp, (STACK_VEC_TMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + + xm.stdsp bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 0)*8 + xm.stdsp bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 1)*8 + xm.stdsp bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 2)*8 + xm.stdsp bc_shr,bc_shr,((STACK_VEC_SAT)/2 + 3)*8 + + { addi bc_shr,sp, (STACK_VEC_SAT)*4 ; xm.mkmsk tail, t3 } + { li _32, 32 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { xm.mkmski mask, 32 ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr acc, acc_shr + xm.vstrpv acc, mask + { addi len, len, -1 ; xm.vclrdr } + { add b, b, _32 ; xm.vldc b} + { nop ; xm.vlmacc0 c} + xm.vlmacc1 c + { add c, c, _32 ; nop} + xm.vlsat bc_shr + { nop ; xm.vlsub acc} + { add acc, acc, _32 ; xm.vstr acc} + { nop ; xm.bt len, .L_loop_top } + +.L_loop_bot: + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr acc, acc_shr + xm.vstrpv acc, tail + { nop ; xm.vclrdr } + { nop ; xm.vldc b} + { nop ; xm.vlmacc0 c} + xm.vlmacc1 c + { mv t3, bc_shr ; nop} + xm.vlsat bc_shr + { nop ; xm.vlsub acc} + { nop ; xm.vstd t3} + xm.vstrpv t3, tail + xm.vstrpv acc, tail + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_scale.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_scale.S new file mode 100644 index 00000000..6e67257e --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_scale.S @@ -0,0 +1,120 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + + headroom_t vect_s16_scale( + int16_t a[], + const int16_t b[], + const unsigned length, + const int16_t c, + const right_shift_t a_shr); + +*/ + + +#define NSTACKWORDS (8+8) + +#define FUNCTION_NAME vect_s16_scale + +#define STACK_VEC_A_SHR 0 +#define STACK_BYTEMASK 10 + +#define a x10 +#define b x11 +#define len x12 +#define c x13 +#define _32 x18 +#define tail x19 + + +.text +.p2align 2 + +FUNCTION_NAME: + { mv t3, c ; xm.entsp (NSTACKWORDS)*4 } + xm.stdsp s3,s2,32 + +xm.zip t3, c, 4 + xm.stdsp c,c,((STACK_VEC_A_SHR/2)+0)*8 + xm.stdsp c,c,((STACK_VEC_A_SHR/2)+1)*8 + xm.stdsp c,c,((STACK_VEC_A_SHR/2)+2)*8 + xm.stdsp c,c,((STACK_VEC_A_SHR/2)+3)*8 + +#undef c +#define tmp x13 + + li t3, 0x100 + { addi t3,sp, (STACK_VEC_A_SHR)*4 ; xm.vsetc t3} + mv tmp, a4 + { mv t3, tmp ; xm.vldc t3} +xm.zip t3, tmp, 4 + xm.stdsp tmp,tmp,((STACK_VEC_A_SHR/2)+0)*8 + xm.stdsp tmp,tmp,((STACK_VEC_A_SHR/2)+1)*8 + xm.stdsp tmp,tmp,((STACK_VEC_A_SHR/2)+2)*8 + xm.stdsp tmp,tmp,((STACK_VEC_A_SHR/2)+3)*8 + +#undef tmp + + { slli tail, len, SIZEOF_LOG2_S16 ; srli len, len, EPV_LOG2_S16 }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; li _32, 32 } + + + { addi t3,sp, (STACK_VEC_A_SHR)*4 ; xm.brff len, .L_loop_bot }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + { addi len, len, -1 ; xm.vclrdr } + { nop ; xm.vlmacc0 b} + xm.vlmacc1 b + { add b, b, _32 ; nop} + xm.vlsat t3 + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { xm.mkmsk tail, tail ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { xm.not tail, tail ; xm.vclrdr } + xm.vstrpv t3, tail + { xm.not tail, tail ; xm.vlmacc0 b} + xm.vlmacc1 b + xm.vlsat t3 + xm.vstrpv a, tail + xm.vstrpv t3, tail + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + + +.L_finish: + xm.lddsp s3,s2,32 + + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sqrt.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sqrt.S new file mode 100644 index 00000000..375aef8f --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sqrt.S @@ -0,0 +1,203 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" +#define XXX 1 +/* + +headroom_t vect_s16_sqrt( + int16_t a[], + const int16_t b[], + const unsigned length, + const right_shift_t b_shr, + const unsigned depth); + +*/ + + +#define NSTACKVECTS (4) +#define NSTACKWORDS (12+8*(NSTACKVECTS)+4) + +#define FUNCTION_NAME vect_s16_sqrt + +// Temporary vector needed because there's no instruction to do vR[] * vR[] +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +// Holds the shifted values of b[] while we're solving it. +#define STACK_VEC_TARGET (NSTACKWORDS-16-4) +// Holds the power of 2 that is currently being worked on inside hte inner loop. +// @todo If we had an instruction that set each vR[k] to the value of a register, this wouldn't be needed. +#define STACK_VEC_POW (NSTACKWORDS-24-4) + +#define STACK_VEC_TMP2 (NSTACKWORDS-32-4) + +#define STACK_DEPTH 6 + +#define a x10 +#define b x11 +#define length x12 +#define b_shr x13 + +#define depth x18 //s2 +#define mask_vec x19 //s3 +#define _32 x20 //s4 +#define pow_init x21 //s5 +#define tmp x24 //s8 +#define spare x23 //s7 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s8,0 + {addi spare,sp, (STACK_VEC_TMP2)*4 ; nop} + + sw a4, (STACK_DEPTH)*4(sp) +// Set VPU mode to 32-bit +// (length << 1) is the length of the vector in bytes. +{ li _32, 32 ; nop } +{ slli t3, _32, 3 ; nop } +{ slli length, length, 1 ; xm.vsetc t3} + la t3, vpu_vec_0x4000 + //la t3, vpu_vec_0x7FFF +{ mv pow_init, t3 ; nop } + + +// Maximum supported depth is 15 +{ li tmp, 15 ; lw t3, (STACK_DEPTH)*4 (sp)} +{ xm.assert t3 ; nop } +{ xm.sltu t3, tmp, t3 ; nop } +{ li _32, 32 ; xm.brff t3, .L_vect_loop_top }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; sw tmp, (STACK_DEPTH)*4 (sp)} + + +.L_vect_loop_top: + + // mask_vec is a byte mask for the elements of a[] that we're currently working on. + // using VSTRPV with mask_vec prevents us from corrupting the headroom register. + // depth is the number of MSBs that we're solving for + { xm.mkmsk mask_vec, length ; lw depth, (STACK_DEPTH)*4 (sp)} + + // First initialize the target vector using b[] + // (Doing this first allows this function to operate in-place on b[] if desired) + // @todo If we wanted to, we could do a VSIGN + VLMUL here to take an absolute value of each b[k], + // since this function will not work for any negative b[k]. + xm.vlashr b, b_shr + { addi t3,sp, (STACK_VEC_TARGET)*4 ; add b, b, _32 } + xm.vstrpv t3, mask_vec + + // Initialize the result (a[]) with 0's + { mv t3, pow_init ; xm.vclrdr } + xm.vstrpv a, mask_vec + + // VEC_POW[] is the bit we're currently solving for. Initialize to the first non-sign bit. + // (The VSTD is to zero out the VEC_POW[] elements that are going to be masked out, because + // we're going to use VEC_POW[] later to update the headroom register) + { addi t3,sp, (STACK_VEC_POW)*4 ; xm.vldr t3} + { li tmp, 1 ; xm.vstd t3} + xm.vstrpv t3, mask_vec + + // This saves us a few cycles on the first iteration (because of loop alignment, we'd need a + // 'bu .L_sqrt_loop_top' here even if we didn't want to skip ahead). It's necessary because + // we don't want to right-shift VEC_POW[] on the first iteration (it's already 2^15), and we + // can't fix that by initializing VEC_POW[] to 0x8000 above because that's negative and + // VLASHR is an arithmetic shift. + { addi t3,sp, (STACK_VEC_TARGET)*4 ; xm.bu .L_first_iter } + + // Inner loop. Iteratively solving for the square root bit-by-bit + // 12 instructions + 1 FNOP + .p2align 4 + .L_sqrt_loop_top: + // Load the next power of 2 and store it back to VEC_POW[] + xm.vlashr t3, tmp + xm.vstrpv t3, mask_vec + + // Add the current power of 2 to each a[] to get the next value to be tested. + // test[k] <-- a[k] + VEC_POW + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vladd a} + + + + // vR[] contains the values we're testing. Store it and square it + // vR[k] <-- ( test[k] * test[k] ) >> 14 + xm.vstrpv t3, mask_vec + { nop ; xm.vlmul0 t3} + xm.vlmul1 t3 + xm.vstrpv spare, mask_vec + {xm.vladd spare;nop} + + { addi t3,sp, (STACK_VEC_TARGET)*4 ; nop} + .L_first_iter: + + // Subtract the squared test values from the target vector + // vR[k] <-- target[k] - (( test[k] * test[k] ) >> 30) + { nop ; xm.vlsub t3} + + // If vR[k] is negative, the test value was too large, so we don't want to update those a[k] + // for which vR[k] is negative. + + // vR[k] = a[k] + MAX( signum( vR[k] ), 0 ) * VEC_POW[k] + + { addi depth, depth, -1 ; xm.vsign } + { addi t3,sp, (STACK_VEC_POW)*4 ; xm.vpos } + + xm.vstrpv spare, mask_vec + {xm.vladd spare;nop} + + { li tmp, 1 ; xm.vlmul0 t3} + xm.vlmul1 t3 + { nop ; xm.vladd a} + + // Store the updated results in a[] + xm.vstrpv a, mask_vec + { nop ; xm.bt depth, .L_sqrt_loop_top } + .L_sqrt_loop_bot: + + // a[] now contains the results, but we haven't updated the headroom register because we've only + // been using VSTRPV. So, update the headroom register + // @todo Do we need to update the headroom register? Aren't we more or less guaranteed there's no + // headroom, because we got rid of the headroom of b[]? Should work out the math on this later. + + // We used mask_vec when initializing VEC_POW[], so we can use that here to avoid corrupting + // the headroom register with data that comes after a[]. x28 is already pointing at VEC_POW[]. + xm.vstrpv t3, mask_vec + { sub length, length, _32 ; xm.vldr t3} + + // If (length - 32) < 1 we're done. + { xm.slt tmp, length, tmp ; xm.vstr t3} + { add a, a, _32 ; nop }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + beqz tmp, .L_vect_loop_top +.L_vect_loop_bot: + +.L_finish: + + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s8,0 +{ li a0, 15 ; xm.vgetc t3} +{ xm.zexti t3, 5 ; nop} +{ sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sum.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sum.S new file mode 100644 index 00000000..3b63e845 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_sum.S @@ -0,0 +1,113 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + + +/* + int32_t vect_s16_sum( + const int16_t b[], + const unsigned length); +*/ + + +#include "../asm_helper.h" + + +#define FUNCTION_NAME vect_s16_sum +#define NSTACKWORDS (24+8+4) + + +#define STACK_VEC_TMP (NSTACKWORDS-24-4) +#define STACK_VEC_TMP2 (NSTACKWORDS-8-2) + +#define b x10 +#define N x11 +#define tail x12 + + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x0100 + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + { addi s4,sp, (STACK_VEC_TMP2)*4 ; nop } + addi s5, s4, (-30) + + { slli tail, N, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.vclrdr } + { srli N, N, EPV_LOG2_S16 ; xm.brff tail, .L_tail_dealt_with }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S16 \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + la t3, vpu_vec_0x0001 + { addi s2,sp, (STACK_VEC_TMP)*4 ; xm.vldr t3} + { nop ; xm.vstd s2} + { xm.mkmsk tail, tail ; slli N, N, 3 } + xm.vstrpv s2, tail + sh2add s3, N, b + { nop ; xm.vldc s2} + { nop ; xm.vclrdr } + { srli N, N, 3 ; xm.vlmaccr0 s3} + xm.vlmaccr1 s3 + + {nop ; xm.vstd s4} + {nop ; xm.vldd s5} + {nop ; xm.vstr s4} + {nop ; xm.vldr s5} + + { li t3, 32 ; xm.vldc t3} + +.L_tail_dealt_with: + { nop ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + la t3, vpu_vec_0x0001 + { li t3, 32 ; xm.vldc t3} + +.L_loop_top: + { addi N, N, -1 ; xm.vlmaccr0 b} + xm.vlmaccr1 b + + {nop ; xm.vstd s4} + {nop ; xm.vldd s5} + {nop ; xm.vstr s4} + {nop ; xm.vldr s5} + + { add b, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + +.L_finish: + + + + { addi a1,sp, (STACK_VEC_TMP)*4 ; nop /* adddr*/ } + + addi s4, a1, 32-2 + + { nop ; xm.vstd a1} + { nop ; lw a0, 0(s4)} + { slli a0, a0, 16 ; xm.vstr a1} + { nop ; lw a1, 0(s4)} + + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + { xm.zexti a1, 16 ; nop} + { or a0, a0, a1 ; xm.retsp (NSTACKWORDS)*4 } + +.L_fend: + + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_fend - FUNCTION_NAME + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_to_s32.S b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_to_s32.S new file mode 100644 index 00000000..0044623a --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_to_s32.S @@ -0,0 +1,111 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +headroom_t vect_s16_to_vect_s32( + int16_t a[], + const int32_t b[], + const unsigned length); +*/ + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKWORDS (4) + +#define FUNCTION_NAME vect_s16_to_vect_s32 + +#define a x10 +#define b x11 +#define len x12 +#define _16 x13 +#define tail x18 +#define constsA x28 +#define constsB x28 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x0200 + { xm.ldap t3, .L_vlmacc_consts_A ; xm.vsetc t3} + + xm.stdsp s3,s2,0 + { slli tail, len, SIZEOF_LOG2_S32 ; nop }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { srli len, len, EPV_LOG2_S32 ; xm.zexti tail, 5 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.mkmsk tail, tail ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 + +.L_vlmacc_consts_A: +.byte 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00, 0x7F, 0x00 +.L_vlmacc_consts_B: +.byte 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02, 0x01 + +/* + This function relies on some seriously horrifying deep magic. Try not to stare directly at it. + + We're converting 16-bit values to 32-bit values with the VPU in 8-bit mode. Because if we deal with a 16-bit value + as two separate 8-bit values, we end up getting 2 adjacent accumulators, which is 32 adjacent bits in vR. It's the + only way to expand values to a higher bit-depth inside the VPU. + + Function also relies on an understanding of the endianness of the system. A 16-bit value 0x1234 is stored as bytes + (in order) [0x34, 0x12]. To avoid changing the value represented, we need these two bytes to also be adjacent in + the output value. But, because these will end up in separate accumulators (the lower 16-bits of each being in vR + with the rest in vD), in order to ensure this, we need one to end up in the upper byte of the 16 bits and the other + to end up in the lower byte, which means our only option is to multiply by 2^8. + + Ultimately what we need in the first 4 bytes of vR (given the value above) is [0x00, 0x34, 0x12, 0x00] which when + written to memory and interpreted as an int32 will be 0x00123400. + + So, the 0x34 just gets MACCed by (0+0+1), leaving it in the lower bits. The 0x12 gets MACCed by (0x7F + 0x7F + 0x02 + = 0x100), pushing it into the high bits of the half word in vR. + +*/ + +.L_loop_top: //All in 8-bit mode + { addi len, len, -1 ; xm.vclrdr } + { li _16, 16 ; xm.vldc b} + { add b, b, _16 ; xm.vlmacc0 t3} + xm.vlmacc1 t3 + { add t3, t3, _16 ; xm.vlmacc0 t3} + xm.vlmacc1 t3 + { sub t3, t3, _16 ; xm.vlmacc0 t3} + xm.vlmacc1 t3 + { add a, a, _16 ; xm.vstr a} + { add a, a, _16 ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { li _16, 16 ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vldc b} + { nop ; xm.vlmacc0 t3} + xm.vlmacc1 t3 + { add t3, t3, _16 ; xm.vlmacc0 t3} + xm.vlmacc1 t3 + { nop ; xm.vlmacc0 t3} + xm.vlmacc1 t3 + xm.vstrpv a, tail + +.L_finish: + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4 /* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS \nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/s32_to_chunk_s32.S b/lib_xcore_math/src/arch/vx4b/vect_s32/s32_to_chunk_s32.S new file mode 100644 index 00000000..f8a312ae --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/s32_to_chunk_s32.S @@ -0,0 +1,49 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* +void s32_to_chunk_s32( + int32_t a[8], + int32_t b); +*/ + + +#define NSTACKWORDS (0) + +#define FUNCTION_NAME s32_to_chunk_s32 + +.text +.p2align 4 + + +#define a x10 +#define b x11 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in entsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdi b,b, 0(a) + xm.stdi b,b, 8(a) + xm.stdi b,b, 16(a) + xm.stdi b,b, 24(a) + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs.S new file mode 100644 index 00000000..5ab41616 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs.S @@ -0,0 +1,106 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKWORDS (12) +#define STACK_TMP_VEC 0 + +#define a x10 +#define b x11 +#define len x12 +#define tail x13 + + +/* +headroom_t vect_s32_abs( + int32_t a[], + const int32_t b[], + const unsigned length); +*/ + +vect_s32_abs: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; slli tail, len, SIZEOF_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { srli len, len, EPV_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.bu .L_apply_op } + +.L_func_end_s32: + + + + +#undef a +#undef b +#undef len + +/* + When branching here: + * a --> x10 + * b --> x11 + * loop_count --> x12 + * tail --> x13 + * VPU mode must already be set. +*/ + +#define a x10 +#define b x11 +#define loop_count x12 +#define tail x13 + +.type .L_apply_op,@function + +.L_apply_op: + + { xm.mkmsk tail, tail ; nop } + { mv t3, b ; xm.brff loop_count, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { li a1, 32 ; xm.bu .L_loop_top } +.p2align 4 +.L_loop_top: + { addi loop_count, loop_count, -1 ; xm.vldr t3} + { nop ; xm.vsign } + { nop ; xm.vlmul0 t3} //TODO this is wrong for 16 bit + { add a, a, a1 ; xm.vstr a} + { add t3, t3, a1 ; xm.bt loop_count, .L_loop_top } +.L_loop_bot: + + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { nop ; xm.vldr t3} + { nop ; xm.vsign } + { addi t3,sp, (STACK_TMP_VEC)*4 ; xm.vlmul0 t3} //TODO this is wrong for 16 bit + { nop ; xm.vstd t3} + { nop ; xm.vpos } + xm.vstrpv t3, tail + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + xm.vstrpv a, tail + +.L_finish: + { li a0, 32 ; xm.vgetc t3} + { srli a1, t3, 8 ; nop } + { xm.zexti t3, 5 ; xm.shr a0, a0, a1 } + { addi t3, t3, 1 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_end_apply_op: +.size .L_apply_op, .L_end_apply_op - .L_apply_op + +.global vect_s32_abs +.type vect_s32_abs,@function +.set vect_s32_abs.nstackwords,NSTACKWORDS; .global vect_s32_abs.nstackwords /* Translation error on this line: unexpected token at position 41. */ +.set vect_s32_abs.maxcores,1; .global vect_s32_abs.maxcores /* Translation error on this line: unexpected token at position 28. */ +.set vect_s32_abs.maxtimers,0; .global vect_s32_abs.maxtimers /* Translation error on this line: unexpected token at position 29. */ +.set vect_s32_abs.maxchanends,0; .global vect_s32_abs.maxchanends /* Translation error on this line: unexpected token at position 31. */ +.size vect_s32_abs, .L_func_end_s32 - vect_s32_abs + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs_sum.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs_sum.S new file mode 100644 index 00000000..49441ade --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_abs_sum.S @@ -0,0 +1,109 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +int64_t vect_s32_abs_sum( + const int32_t b[], + const unsigned length); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s32_abs_sum + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_VR (NSTACKWORDS-16-4) + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define tail x12 // ![0x%X] +#define _32 x13 // ![%d] +#define tmp x18 // ![%d] +#define mask x19 // ![0x%X] + + + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + { li t3, 0 ; slli tail, N, SIZEOF_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vsetc t3} + { xm.zexti tail, 5 ; xm.vclrdr } + { srli N, N, EPV_LOG2_S32 ; xm.brff tail, .L_tail_dealt_with }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S32 \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + { addi tmp,sp, (STACK_VEC_TMP)*4 ; mv t3, b } + { xm.mkmsk mask, tail ; xm.vstd tmp} + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vldr t3} + { nop ; xm.vsign } + xm.vstrpv tmp, mask + { nop ; xm.vldc tmp} + { nop ; xm.vclrdr } + { add b, b, tail ; xm.vlmacc0 b} +.L_tail_dealt_with: + { li _32, 32 ; xm.brff N, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top_s32: + { mv t3, b ; xm.vstr t3} + { addi N, N, -1 ; xm.vldr t3} + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vsign } + { nop ; xm.vstr t3} + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vldc t3} + { nop ; xm.vldr t3} + { add b, b, _32 ; xm.vlmacc0 b} + { nop ; xm.bt N, .L_loop_top_s32 } +.L_loop_bot_s32: + +.L_finish_s32: + + { nop ; xm.vstr t3} +lui t3, %hi(vpu_vec_0x40000000) + addi t3,t3, %lo(vpu_vec_0x40000000) + { nop ; xm.vldc t3} +lui t3, %hi(vpu_vec_0x80000000) + addi t3,t3, %lo(vpu_vec_0x80000000) + { nop ; xm.vlmacc0 t3} +lui t3, %hi(vpu_vec_zero) + addi t3,t3, %lo(vpu_vec_zero) + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vldr t3} + { nop ; xm.vlmaccr0 t3} + { nop ; xm.vstd t3} + { nop ; xm.vlmaccr0 t3} + { nop ; xm.vstr t3} + { nop ; lw a1,0 ( t3)} + { addi a1, a1, 8 ; lw a0,4 ( t3)} + + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_end: + .size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmax.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmax.S new file mode 100644 index 00000000..74fee67f --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmax.S @@ -0,0 +1,154 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +unsigned vect_s32_argmax( + const int32_t b[], + const unsigned length); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (3) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s32_argmax + +#define STACK_VEC_MAX_DEX (NSTACKWORDS-8-4) +#define STACK_VEC_CUR_MAX (NSTACKWORDS-16-4) +#define STACK_VEC_CUR_DEX (NSTACKWORDS-24-4) + +#define STACK_N 6 + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define vec_8s x12 // ![0x%X] +#define tmp x13 // ![%d] +#define tmz x18 // ![%d] +#define cur_max x19 // ![0x%08X] +#define mask_0xF x20 // ![0x%04X] + + + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + +{ li t3, 0 ; sw N, (STACK_N)*4 (sp)} +{ srli N, N, 3 ; xm.vsetc t3} + +// cur_max[i] = -0x80000000 + la t3, vpu_vec_0x80000000 +{ addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vldr t3} +{ xm.mkmski mask_0xF, 4 ; xm.vstr t3} + +// cur_dex[i] = i +{ addi tmp,sp, (STACK_VEC_CUR_DEX)*4 ; li t3, 7 } +.L_setup_cur_dex: +xm.stw t3, t3(tmp) + { addi t3, t3, -1 ; xm.bt t3, .L_setup_cur_dex } + +// max_dex[i] = -1 + la t3, vpu_vec_neg_1 +{ addi t3,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} +{ addi cur_max,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vstr t3} + + la t3, vpu_vec_0x00000008 +{ mv vec_8s, t3 ; xm.vclrdr } +{ mv t3, b ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top: + { mv b, t3 ; xm.vldr t3} + { addi N, N, -1 ; xm.vlsub cur_max} + { addi t3,sp, 0 ; xm.vdepth1 } + xm.vstrpv t3, mask_0xF + { mv t3, b ; lw tmp, 0 (sp)} + { mv tmz, tmp ; nop } +xm.zip tmz, tmp, 0 + { mv tmz, tmp ; nop } +xm.zip tmz, tmp, 0 + { addi t3,sp, (STACK_VEC_CUR_DEX)*4 ; xm.vldr t3} + xm.vstrpv cur_max, tmp + { addi tmz,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} + xm.vstrpv tmz, tmp + { nop ; xm.vladd vec_8s} + { li t3, 32 ; xm.vstr t3} + { add t3, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + +{ nop ; lw N, (STACK_N)*4 (sp)} +{ xm.zexti N, 3 ; nop } +{ xm.mkmsk N, N ; xm.brff N, .L_no_tail }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +{ mv b, t3 ; xm.vldr t3} +{ addi t3,sp, 0 ; xm.vlsub cur_max} +{ nop ; xm.vdepth1 } + xm.vstrpv t3, mask_0xF +{ mv t3, b ; lw tmp, 0 (sp)} +{ and tmp, tmp, N ; and tmz, tmp, N } +xm.zip tmz, tmp, 0 +{ mv tmz, tmp ; nop } +xm.zip tmz, tmp, 0 +{ addi t3,sp, (STACK_VEC_CUR_DEX)*4 ; xm.vldr t3} + xm.vstrpv cur_max, tmp +{ addi tmz,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} + xm.vstrpv tmz, tmp + +.L_no_tail: + +#undef cur_max +#undef vec_8s +#undef mask_0xF + +#define cur_max x19 // ![%d] +#define max_dex x12 // ![0x%08X] + +{ addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; li N, 7 } +{ addi s4,sp, (STACK_VEC_MAX_DEX)*4 ; xm.ldw cur_max,N ( t3)} +{ addi N, N, -1 ; xm.ldw max_dex,N ( s4)} +.L_loop2_top: + { nop ; xm.ldw a0,N ( t3)} + { xm.slt tmp, a0, cur_max ; xm.ldw s5,N ( s4)} + xm.eq tmz, a0, cur_max +{ nop ; xm.bt tmp, .L_less_than } + .L_greater_or_equal: + { xm.slt tmp, s5, max_dex ; xm.brff tmz, .L_greater }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_equal: + { nop ; xm.brff tmp, .L_less_than }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_greater: + { mv cur_max, a0 ; mv max_dex, s5 } + + .L_less_than: + { addi N, N, -1 ; xm.bt N, .L_loop2_top } + + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 +{ mv a0, max_dex ; xm.retsp (NSTACKWORDS)*4 } + + +.L_end: + + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmin.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmin.S new file mode 100644 index 00000000..9e441893 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_argmin.S @@ -0,0 +1,161 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +unsigned vect_s32_argmin( + const int32_t b[], + const unsigned length); + + +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (3) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s32_argmin + +#define STACK_VEC_MAX_DEX (NSTACKWORDS-8-4) +#define STACK_VEC_CUR_MAX (NSTACKWORDS-16-4) +#define STACK_VEC_CUR_DEX (NSTACKWORDS-24-4) + +#define STACK_N 6 + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define vec_8s x12 // ![0x%X] +#define tmp x13 // ![%d] +#define tmz x18 // ![%d] +#define cur_min x19 // ![0x%08X] +#define mask_0xF x20 // ![0x%04X] +#define vec_ones x21 // ![0x%08X] + + + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + +{ li t3, 0 ; sw N, (STACK_N)*4 (sp)} +{ srli N, N, 3 ; xm.vsetc t3} + +// cur_min[i] = 0x7FFFFFFF + la t3, vpu_vec_0x7FFFFFFF +{ addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vldr t3} +{ xm.mkmski mask_0xF, 4 ; xm.vstr t3} + +// cur_dex[i] = i +{ addi tmp,sp, (STACK_VEC_CUR_DEX)*4 ; li t3, 7 } +.L_setup_cur_dex: +xm.stw t3, t3(tmp) + { addi t3, t3, -1 ; xm.bt t3, .L_setup_cur_dex } + +// max_dex[i] = -1 +lui t3, %hi(vpu_vec_neg_1) + addi t3,t3, %lo(vpu_vec_neg_1) +{ addi t3,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} +{ addi cur_min,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vstr t3} + + la t3, vpu_vec_0x00000008 +{ mv vec_8s, t3 ; xm.vclrdr } + la t3, vpu_vec_0x00000001 +{ mv vec_ones, t3 ; nop } +{ mv t3, b ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top: + { mv b, t3 ; xm.vldr t3} + { nop ; xm.vladd vec_ones} + { addi N, N, -1 ; xm.vlsub cur_min} + { addi t3,sp, 0 ; xm.vdepth1 } + xm.vstrpv t3, mask_0xF + { mv t3, b ; lw tmp, 0 (sp)} + { xm.not tmp, tmp ; xm.not tmz, tmp } +xm.zip tmz, tmp, 0 + { mv tmz, tmp ; nop } +xm.zip tmz, tmp, 0 + { addi t3,sp, (STACK_VEC_CUR_DEX)*4 ; xm.vldr t3} + xm.vstrpv cur_min, tmp + { addi tmz,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} + xm.vstrpv tmz, tmp + { nop ; xm.vladd vec_8s} + { li t3, 32 ; xm.vstr t3} + { add t3, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + +{ nop ; lw N, (STACK_N)*4 (sp)} +{ xm.zexti N, 3 ; nop } +{ xm.mkmsk N, N ; xm.brff N, .L_no_tail }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +{ mv b, t3 ; xm.vldr t3} +{ nop ; xm.vladd vec_ones} +{ addi t3,sp, 0 ; xm.vlsub cur_min} +{ nop ; xm.vdepth1 } + xm.vstrpv t3, mask_0xF +{ mv t3, b ; lw tmp, 0 (sp)} +{ xm.not tmp, tmp ; nop } +{ and tmp, tmp, N ; and tmz, tmp, N } +xm.zip tmz, tmp, 0 +{ mv tmz, tmp ; nop } +xm.zip tmz, tmp, 0 +{ addi t3,sp, (STACK_VEC_CUR_DEX)*4 ; xm.vldr t3} + xm.vstrpv cur_min, tmp +{ addi tmz,sp, (STACK_VEC_MAX_DEX)*4 ; xm.vldr t3} + xm.vstrpv tmz, tmp + +.L_no_tail: + +#undef cur_min +#undef vec_16s +#undef mask_0xF + +#define cur_min x19 // ![%d] +#define min_dex x12 // ![0x%08X] + +{ addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; li N, 7 } +{ addi s4,sp, (STACK_VEC_MAX_DEX)*4 ; xm.ldw cur_min,N ( t3)} +{ addi N, N, -1 ; xm.ldw min_dex,N ( s4)} +.L_loop2_top: + { nop ; xm.ldw a0,N ( t3)} + { xm.slt tmp, cur_min, a0 ; xm.ldw s5,N ( s4)} + xm.eq tmz, cur_min, a0 +{ nop ; xm.bt tmp, .L_greater_than } + .L_less_or_equal: + { xm.slt tmp, s5, min_dex ; xm.brff tmz, .L_less }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_equal: + { nop ; xm.brff tmp, .L_greater_than }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + .L_less: + { mv cur_min, a0 ; mv min_dex, s5 } + + .L_greater_than: + { addi N, N, -1 ; xm.bt N, .L_loop2_top } + + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 +{ mv a0, min_dex ; xm.retsp (NSTACKWORDS)*4 } + +.L_end: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_clip.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_clip.S new file mode 100644 index 00000000..6c8ae9ec --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_clip.S @@ -0,0 +1,330 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +headroom_t vect_s32_clip( + int32_t a[], + const int32_t b[], + const unsigned length, + const int32_t lower_bound, + const int32_t upper_bound, + const int b_shr); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + + +#define NSTACKVECS (6) +#define NSTACKWORDS (8 + 8*(NSTACKVECS)+8) + +#define FUNCTION_NAME vect_s32_clip + +#define STACK_VEC(K) (NSTACKWORDS - (8*((K)+1))-8) + +#define a x10 +#define b x11 +#define N x12 +#define lower x13 +#define upper x18 +#define b_shr x19 +#define tail x20 +#define tmp1 x21 +#define tmp2 x22 +#define int_max x23 +#define int_min x24 + + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + li t3, 0x0 + xm.stdsp s7,s6,16 + { slli tail, N, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; sw s8, 24 (sp)} + + { li tmp1, 31 ; srli N, N, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.mkmsk int_max, tmp1 ; xm.vclrdr + } + { xm.addi int_min, int_max, 1 ; xm.mkmsk tail, tail } + + // If upper >= 0 and lower <= 0, we can do this more efficiently. + mv upper, a4 + { li tmp1, 0 ; nop} + mv b_shr, a5 + { xm.slt tmp2, upper, tmp1 ; nop } + bnez tmp2, .L_lower_nice + { xm.slt tmp1, tmp1, lower ; nop } + bnez tmp1, .L_upper_nice + + + // Otherwise, we have the nice situation. +.L_nice: + + //In the nice situation, the upper bound is no more than 1 VLADD away from the positive saturation + // point of the VPU, and the lower bound is no more than 1 VLADD away from the negative saturation + // point of the VPU. + + { addi t3,sp, (STACK_VEC(0))*4 ; sub upper, int_max, upper } + { nop ; sub lower, int_min, lower } + + + xm.stdi upper,upper, 0(t3) + xm.stdi upper,upper, 8(t3) + xm.stdi upper,upper, 16(t3) + xm.stdi upper,upper, 24(t3) + + { nop ; xm.neg upper, upper } + { addi t3,sp, (STACK_VEC(1))*4 ; nop } + xm.stdi lower,lower, 0(t3) + xm.stdi lower,lower, 8(t3) + xm.stdi lower,lower, 16(t3) + xm.stdi lower,lower, 24(t3) + + { nop ; xm.neg lower, lower } + { nop ; addi t3,sp, (STACK_VEC(2))*4 } + + xm.stdi upper,upper, 0(t3) + xm.stdi upper,upper, 8(t3) + xm.stdi upper,upper, 16(t3) + xm.stdi upper,upper, 24(t3) + + { nop ; addi t3,sp, (STACK_VEC(3))*4 } + + xm.stdi lower,lower, 0(t3) + xm.stdi lower,lower, 8(t3) + xm.stdi lower,lower, 16(t3) + xm.stdi lower,lower, 24(t3) + +#define vec_upper upper +#define vec_lower lower +#define vec_nupper tmp1 +#define vec_nlower tmp2 +#define _32 int_min + + {addi vec_upper,sp, (STACK_VEC(0))*4 ; nop} + {addi vec_lower,sp, (STACK_VEC(1))*4 ; nop} + {addi vec_nupper,sp, (STACK_VEC(2))*4 ; nop} + {addi vec_nlower,sp, (STACK_VEC(3))*4 ; nop} + { li _32, 32 ; xm.brff N, .L_nice_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_nice_loop_top: + xm.vlashr b, b_shr + { add b, b, _32 ; xm.vladd vec_upper} + { addi N, N, -1 ; xm.vladd vec_nupper} + { nop ; xm.vladd vec_lower} + { nop ; xm.vladd vec_nlower} + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt N, .L_nice_loop_top } + .L_nice_loop_bot: + + beqz tail, .L_finish + + xm.vlashr b, b_shr + { nop ; xm.vladd vec_upper} + { nop ; xm.vladd vec_nupper} + { nop ; xm.vladd vec_lower} + { nop ; xm.vladd vec_nlower} + j .L_finishish + +/* + C logic: + + void clip16(int16_t output[], int16_t input[], int16_t lower, int16_t upper, unsigned length, int input_shr) + { + if(upper >= 0 && lower <= 0){ + + int16_t up_thing = VPU_INT16_MAX - upper; + int16_t lo_thing = VPU_INT16_MIN - lower; + + // 7 instructions required + for(unsigned int i = 0; i < length; i++){ + + int16_t tmp = input[i] >> input_shr; + tmp = SATURATING_ADD(tmp, up_thing); + tmp = tmp - up_thing; + tmp = SATURATING_ADD(tmp, lo_thing); + tmp = tmp - lo_thing + + output[i] = tmp; + } + } else { + + int16_t one, two, three; + + if(upper >= 0){ + one = VPU_INT16_MAX - upper; + two = VPU_INT16_MIN; + three = VPU_INT16_MIN - (lower - upper); + } else { + one = VPU_INT16_MIN - lower; + two = VPU_INT16_MAX; + three = VPU_INT16_MAX - (upper - lower); + } + + // 9 instructions required + for(unsigned int i = 0; i < length; i++){ + + int16_t tmp = input[i] >> input_shr; + tmp = SATURATING_ADD(tmp, one); + tmp = tmp - one; + tmp = tmp + two; + tmp = SATURATING_ADD(tmp, three); + tmp = tmp - three; + tmp = tmp - two; + + output[i] = tmp; + } + } + } + +*/ + + + +#undef vec_upper +#undef vec_lower +#undef vec_nupper +#undef vec_nlower +#undef _32 + +#define vec_one upper +#define vec_two lower +#define vec_three tmp1 + +#define vec_none tmp2 +#define vec_ntwo int_max +#define vec_nthree int_min + + // The nice thing about the not nice scenario is that at least one of the two bounds is + // guaranteed to be within one VLADD of the relevant saturation point. + +.L_upper_nice: + + { sub vec_one, int_max, upper ; xm.neg vec_three, lower } + addi vec_three, vec_three, -1 + { addi vec_two, int_min, 1 ; xm.bu .L_not_nice_thing } + +.L_lower_nice: + { sub vec_one, int_min, lower ; xm.neg vec_three, upper } + { mv vec_two, int_max ; nop } + + +.L_not_nice_thing: + + { addi t3,sp, (STACK_VEC(0))*4 ; nop } + xm.stdi vec_one,vec_one, 0(t3) + xm.stdi vec_one,vec_one, 8(t3) + xm.stdi vec_one,vec_one, 16(t3) + xm.stdi vec_one,vec_one, 24(t3) + + { addi t3,sp, (STACK_VEC(1))*4 ; nop } + xm.stdi vec_two,vec_two, 0(t3) + xm.stdi vec_two,vec_two, 8(t3) + xm.stdi vec_two,vec_two, 16(t3) + xm.stdi vec_two,vec_two, 24(t3) + + + lui t3, 0x80000 + bne t3, vec_one, .skip1 + addi vec_one, vec_one, 1 +.skip1: + bne t3, vec_two, .skip2 + addi vec_two, vec_two, 1 +.skip2: +{ neg vec_one, vec_one ; nop } +{ nop;neg vec_two, vec_two } + + { addi t3,sp, (STACK_VEC(3))*4 ; nop } + xm.stdi vec_one,vec_one, 0(t3) + xm.stdi vec_one,vec_one, 8(t3) + xm.stdi vec_one,vec_one, 16(t3) + xm.stdi vec_one,vec_one, 24(t3) + + { addi t3,sp, (STACK_VEC(2))*4 ; nop } + xm.stdi vec_two,vec_two, 0(t3) + xm.stdi vec_two,vec_two, 8(t3) + xm.stdi vec_two,vec_two, 16(t3) + xm.stdi vec_two,vec_two, 24(t3) + + { xm.neg vec_two, vec_three ; nop } + { addi t3,sp, (STACK_VEC(4))*4 ; nop } + xm.stdi vec_three,vec_three, 0(t3) + xm.stdi vec_three,vec_three, 8(t3) + xm.stdi vec_three,vec_three, 16(t3) + xm.stdi vec_three,vec_three, 24(t3) + + { addi t3,sp, (STACK_VEC(5))*4 ; nop } + xm.stdi vec_two,vec_two, 0(t3) + xm.stdi vec_two,vec_two, 8(t3) + xm.stdi vec_two,vec_two, 16(t3) + xm.stdi vec_two,vec_two, 24(t3) + + {addi vec_one,sp, (STACK_VEC(0))*4 ; nop} + {addi vec_none,sp, (STACK_VEC(3))*4 ; nop} + {addi vec_two,sp, (STACK_VEC(1))*4 ; nop} + {addi vec_ntwo,sp, (STACK_VEC(2))*4 ; nop} + {addi vec_three,sp, (STACK_VEC(4))*4 ; nop} + {addi vec_nthree,sp, (STACK_VEC(5))*4 ; nop} + + { li t3, 32 ; xm.brff N, .L_not_nice_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +.L_not_nice_loop_top: + xm.vlashr b, b_shr + { add b, b, t3 ; xm.vladd vec_one} + { addi N, N, -1 ; xm.vladd vec_none} + { nop ; xm.vladd vec_two} + { nop ; xm.vladd vec_three} + { nop ; xm.vladd vec_nthree} + { nop ; xm.vladd vec_ntwo} + { add a, a, t3 ; xm.vstr a} + { nop ; xm.bt N, .L_not_nice_loop_top } +.L_not_nice_loop_bot: + + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, b_shr + { nop ; xm.vladd vec_one} + { nop ; xm.vladd vec_none} + { nop ; xm.vladd vec_two} + { nop ; xm.vladd vec_three} + { nop ; xm.vladd vec_nthree} + { nop ; xm.vladd vec_ntwo} + + +.L_finishish: + { nop ; xm.vstd tmp1} + xm.vstrpv a, tail + xm.vstrpv tmp1, tail + { nop ; xm.vldd tmp1} + { nop ; xm.vstd tmp1} + +.L_finish: + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s8, 24 (sp)} + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_dot.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_dot.S new file mode 100644 index 00000000..b1697f5f --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_dot.S @@ -0,0 +1,118 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +int64_t vect_s32_dot( + const int32_t b[], + const int32_t c[], + const unsigned length, + const int b_shr, + const int c_shr); +*/ + +#include "../asm_helper.h" + +#define NSTACKWORDS (8 + 24+4) + +#define FUNCTION_NAME vect_s32_dot + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_VR (NSTACKWORDS-16-4) +#define STACK_VEC_VD (NSTACKWORDS-24-4) + +#define b x10 +#define c x11 +#define N x12 +#define b_shr x13 +#define c_shr x18 +#define tail x19 +#define vec_vd x20 +#define vec_vr x21 +#define vec_tmp x22 +#define _32 x23 + + + +.text; .issue_mode dual /* Translation error on this line: unexpected token at position 5. */ +.p2align 2 + + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + + { li t3, 0 ; nop } + { slli tail, N, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.vclrdr } + { srli N, N, EPV_LOG2_S32 ; addi vec_tmp,sp, (STACK_VEC_TMP)*4 }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + mv c_shr, a4 + +{ xm.ldawsp vec_vr, (STACK_VEC_VD + 1)*4 ; nop} +{ xm.ldawsp vec_vd, (STACK_VEC_VD )*4 ; nop } + { li _32, 32 ; xm.brff N, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top_s32: + { add vec_vd, vec_vd, _32 ; xm.vstd vec_vd} + { sub vec_vd, vec_vd, _32 ; xm.vstr vec_vd} + xm.vlashr b, b_shr + { add b, b, _32 ; xm.vstr vec_tmp} + { nop ; xm.vldc vec_tmp} + xm.vlashr c, c_shr + { mv t3, vec_vr ; xm.vstr vec_tmp} + { sub vec_vr, vec_vr, _32 ; xm.vldr t3} + { add vec_vr, vec_vr, _32 ; xm.vldd vec_vr} + { addi N, N, -1 ; xm.vlmaccr0 vec_tmp} + { add c, c, _32 ; xm.bt N, .L_loop_top_s32 } +.L_loop_bot_s32: + { xm.mkmsk tail, tail ; xm.brff tail, .L_finish_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { add vec_vd, vec_vd, _32 ; xm.vstd vec_vd} + { sub vec_vd, vec_vd, _32 ; xm.vstr vec_vd} + { nop ; xm.vclrdr } + xm.vlashr b, b_shr + { add b, b, _32 ; xm.vstd vec_tmp} + xm.vstrpv vec_tmp, tail + { nop ; xm.vldc vec_tmp} + xm.vlashr c, c_shr + { mv t3, vec_vr ; xm.vstr vec_tmp} + { sub vec_vr, vec_vr, _32 ; xm.vldr t3} + { nop ; xm.vldd vec_vr} + { nop ; xm.vlmaccr0 vec_tmp} + +.L_finish_s32: + + { addi t3,sp, (STACK_VEC_TMP)*4 ; nop } + // (vD:vR)[k] == ((int32_t)vD[k])*(2^32) + ((uint32_t)vR[k]) + { nop ; xm.vstd t3} + { nop ; lw a1,0 ( t3)} +{ xm.sext a1, 8 ; nop} + {nop; xm.vstr t3} + { nop ; lw a0,0 ( t3)} + + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_energy.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_energy.S new file mode 100644 index 00000000..1109e9a8 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_energy.S @@ -0,0 +1,111 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +int64_t vect_s32_energy( + const int32_t b[], + const unsigned length, + const right_shift_t b_shr); +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + + +#define FUNCTION_NAME vect_s32_energy + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_VR (NSTACKWORDS-16-4) + +#define b x10 +#define N x11 +#define b_shr x12 +#define vec_tmp x13 +#define tail x18 + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + { li t3, 0 ; addi vec_tmp,sp, (STACK_VEC_TMP)*4 } + { slli tail, N, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.vclrdr } + { srli N, N, EPV_LOG2_S32 ; xm.brff tail, .L_tail_dealt_with_s32 }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S32 \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + { nop ; slli N, N, 5 } + { add t3, b, N ; xm.vstd vec_tmp} + { xm.mkmsk tail, tail ; nop } + xm.vlashr t3, b_shr + xm.vstrpv vec_tmp, tail +#undef tail + + { nop ; xm.vldc vec_tmp} + { nop ; xm.vclrdr } + { srli N, N, 5 ; xm.vlmacc0 vec_tmp} + +.L_tail_dealt_with_s32: + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.brff N, .L_loop_bot_s32 }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + +.L_loop_top_s32: + { li t3, 32 ; xm.vstr t3} + xm.vlashr b, b_shr + { add b, b, t3 ; xm.vstr vec_tmp} + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vldc vec_tmp} + { nop ; xm.vldr t3} + { addi N, N, -1 ; xm.vlmacc0 vec_tmp} + { nop ; xm.bt N, .L_loop_top_s32 } +.L_loop_bot_s32: + +.L_finish_s32: + + +lui t3, %hi(vpu_vec_0x40000000) + addi t3,t3, %lo(vpu_vec_0x40000000) + { addi a2,sp, (STACK_VEC_TMP)*4 ; xm.vldc t3} +lui t3, %hi(vpu_vec_0x80000000) + addi t3,t3, %lo(vpu_vec_0x80000000) + { nop ; xm.vstr a2} + { nop ; xm.vlmacc0 t3} +lui t3, %hi(vpu_vec_zero) + addi t3,t3, %lo(vpu_vec_zero) + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vldr t3} + { nop ; xm.vlmaccr0 t3} + { nop ; xm.vstd t3} + { nop ; xm.vlmaccr0 t3} + { nop ; xm.vstr t3} + { nop ; lw a1,0 ( t3)} + { addi a1, a1, 8 ; lw a0,4 ( t3)} + + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_end: + .size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_inverse.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_inverse.S new file mode 100644 index 00000000..0524478b --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_inverse.S @@ -0,0 +1,113 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_s32_inverse( + int32_t a[], + const int32_t b[], + const unsigned length, + const unsigned scale); + +*/ + + +#define NSTACKVECTS (1) +#define NSTACKWORDS (8+8*(NSTACKVECTS)+4) + +#define FUNCTION_NAME vect_s32_inverse + + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + + +#define a x10 +#define b x11 +#define length x12 +#define scale x13 +#define div_hi x13 +#define div_lo x18 +#define v_mask x19 +#define _32 x20 +#define val1 x21 +#define val2 x22 +#define vec_tmp x23 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + +{ li t3, 0 ; sw s8, 24 (sp)} +{ slli length, length, 2 ; xm.vsetc t3} + +{ li _32, 32 ; nop } +{ sub val2, scale, _32 ; li val1, 1 } +{ xm.shl div_hi, val1, val2 ; xm.shl div_lo, val1, scale }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli div_hi, val1, val2 \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli div_lo, val1, scale \nMessage: The shift amount is not 32" */ +{ xm.vclrdr; nop} +{ addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.brff length, .L_loop_bot }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +{xm.vldr b; nop} +.p2align 4 +.L_loop_top: + // The masked out elements will + { xm.mkmsk v_mask, length ; xm.vstd vec_tmp } + xm.vlashr b, v_mask + { sub length, length, _32 ; xm.vsign } + + { nop ; xm.vlmul0 b } + xm.vstrpv vec_tmp, v_mask + xm.vlashr b, v_mask + { add b, b, _32 ; xm.vsign } + { mv val2, v_mask ; nop } + .L_div_loop_top: + + { srli val2, val2, 4 ; lw val1,0 ( vec_tmp)} + xm.ldivu val1, s8, div_hi, div_lo, val1 + { addi vec_tmp, vec_tmp, 4 ; sw val1,0 ( vec_tmp)} + { nop ; xm.bt val2, .L_div_loop_top } + .L_div_loop_bot: + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; nop } + + + { li val1, 1 ; xm.vlmul0 vec_tmp} + xm.vstrpv a, v_mask + { xm.slt val1, length, val1 ; xm.vstr vec_tmp} // Headroom update + { add a, a, _32 ; nop } + beqz val1, .L_loop_top +.L_loop_bot: + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 +{ li a0, 31 ; xm.vgetc t3} +{ xm.zexti t3, 5 ; lw s8, 24 (sp)} +{ sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_macc.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_macc.S new file mode 100644 index 00000000..3b7aeec1 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_macc.S @@ -0,0 +1,119 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_s32_macc( + int32_t acc[], + const int32_t b[], + const int32_t c[], + const unsigned len, + const int acc_shr, + const int b_shr, + const int c_shr); +*/ + + +#define NSTACKWORDS (8+8+4) + +#define FUNCTION_NAME vect_s32_macc + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_BYTEMASK 7 + +#define acc x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define _32 x20 +#define tmp_vec x21 +#define shr_acc x22 +#define bytemask len + +.text +.p2align 2 + + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; sw s6, 24 (sp)} + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + mv shr_acc, a4 + + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + { li _32, 32 ; xm.vclrdr } + mv shr_c, a6 + { xm.mkmsk t3, t3 ; nop} + mv shr_b, a5 + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { xm.mkmski t3, 32 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr acc, shr_acc + xm.vstrpv acc, t3 + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { add b, b, _32 ; nop } + { add c, c, _32 ; xm.vlmul0 tmp_vec} + { nop ; xm.vladd acc} + { addi len, len, -1 ; xm.vstr acc} + { add acc, acc, _32 ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff bytemask, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr acc, shr_acc + xm.vstrpv acc, bytemask + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { mv t3, tmp_vec ; xm.vlmul0 tmp_vec} + { nop ; xm.vladd acc} + { nop ; xm.vstd tmp_vec} + xm.vstrpv tmp_vec, bytemask + xm.vstrpv acc, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s6, 24 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_max.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_max.S new file mode 100644 index 00000000..c3e82220 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_max.S @@ -0,0 +1,114 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +int32_t vect_s32_max( + const int32_t b[], + const unsigned length); + + +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s32_max + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_CUR_MAX (NSTACKWORDS-16-4) + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define tail x12 // ![0x%X] +#define tmp x13 // ![%d] +#define tmz x18 // ![%d] + + + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + { li t3, 0 ; slli tail, N, SIZEOF_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { srli N, N, EPV_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; slli tmp, N, 5 } +lui t3, %hi(vpu_vec_0x80000000) + addi t3,t3, %lo(vpu_vec_0x80000000) + { addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; xm.vldr t3} + { add t3, b, tmp ; xm.vstr t3} + { xm.mkmsk tail, tail ; xm.vldr t3} + { addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; nop } + xm.vstrpv t3, tail + + // Tail is fully accounted for in cur_max now. + +#undef tail +#define cur_max x12 // ![0x%08X] + + { addi tmp,sp, (STACK_VEC_TMP)*4 ; mv cur_max, t3 } + { nop ; xm.vclrdr } + { mv t3, b ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + // cur_max[] saved in stack + + { mv b, t3 ; xm.vldr t3} // vR[i] = b[i] + { addi N, N, -1 ; xm.vlsub cur_max} // vR[i] = cur_max[i] - b[i] + { addi t3,sp, 0 ; xm.vdepth1 } // vR[0] = [bitmask -- 1 where vR[i] < 0] b[i] > cur_max[i] + { xm.mkmski tmp, 1 ; nop } + xm.vstrpv t3, tmp + { mv t3, b ; lw tmp, 0 (sp)} + { mv tmz, tmp ; xm.vldr t3} +xm.zip tmz, tmp, 0 + { mv tmz, tmp ; li t3, 32 } +xm.zip tmz, tmp, 0 + xm.vstrpv cur_max, tmp + { add t3, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + + + { addi t3,sp, (STACK_VEC_CUR_MAX)*4 ; lw cur_max, (STACK_VEC_CUR_MAX)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { li N, 7 ; addi t3, t3, 4 } + .L_loop2_top: + { addi N, N, -1 ; lw a0,0 ( t3)} + { xm.slt tmp, a0, cur_max ; nop } + {xm.shli tmp, tmp, 1; nop} + { addi t3, t3, 4 ; xm.bru tmp } + { mv cur_max, a0 ; nop } + { mv a0, cur_max ; xm.bt N, .L_loop2_top } + + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_end: + .size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_merge_accs.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_merge_accs.S new file mode 100644 index 00000000..70f8e88d --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_merge_accs.S @@ -0,0 +1,104 @@ +// Copyright 2021-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + void vect_s32_merge_accs( + int32_t a[], + split_acc_s32_t b[], + const unsigned length); +*/ + + +#define NSTACKWORDS (20) + +#define FUNCTION_NAME vect_s32_merge_accs + +#define merged x10 +#define split x11 +#define len x12 +#define _32 x13 + +#define tmpR x18 +#define tmpD x19 + + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,64 + + { li t3, 15 ; li _32, 32 } + { add len, len, t3 ; sub merged, merged, _32 } + { srli len, len, 4 ; addi t3,sp, 0 } + + .L_loop_top: + + { add split, split, _32 ; xm.vldd split} + { addi t3,sp, 32 ; xm.vstd t3} + { add split, split, _32 ; xm.vldd split} + { addi t3,sp, 0 ; xm.vstd t3} + { addi len, len, -1 ; lw tmpD, 0 (sp)} + { add merged, merged, _32 ; lw tmpR, 32 (sp)} +xm.unzip tmpD, tmpR, 4 + xm.stdi tmpR,tmpD, 0(merged) + { nop ; lw tmpD, 4 (sp)} + { nop ; lw tmpR, 36 (sp)} +xm.unzip tmpD, tmpR, 4 + xm.stdi tmpR,tmpD, 8(merged) + { nop ; lw tmpD, 8 (sp)} + { nop ; lw tmpR, 40 (sp)} +xm.unzip tmpD, tmpR, 4 + xm.stdi tmpR,tmpD, 16(merged) + { nop ; lw tmpD, 12 (sp)} + { nop ; lw tmpR, 44 (sp)} +xm.unzip tmpD, tmpR, 4 + xm.stdi tmpR,tmpD, 24(merged) + { nop ; lw tmpD, 16 (sp)} + { nop ; lw tmpR, 48 (sp)} +xm.unzip tmpD, tmpR, 4 + xm.stdi tmpR,tmpD, 32(merged) + { nop ; lw tmpD, 20 (sp)} + { nop ; lw tmpR, 52 (sp)} +xm.unzip tmpD, tmpR, 4 + xm.stdi tmpR,tmpD, 40(merged) + { nop ; lw tmpD, 24 (sp)} + { nop ; lw tmpR, 56 (sp)} +xm.unzip tmpD, tmpR, 4 + xm.stdi tmpR,tmpD, 48(merged) + { nop ; lw tmpD, 28 (sp)} + { nop ; lw tmpR, 60 (sp)} +xm.unzip tmpD, tmpR, 4 + xm.stdi tmpR,tmpD, 56(merged) + { add merged, merged, _32 ;nop } + bnez len, .L_loop_top + + .L_finish: + xm.lddsp s3,s2,64 + { nop ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_min.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_min.S new file mode 100644 index 00000000..d788983d --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_min.S @@ -0,0 +1,114 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* + +int32_t vect_s32_min( + const int32_t b[], + const unsigned length); + + +*/ + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKVECS (2) +#define NSTACKWORDS (8 + 8*NSTACKVECS+4) + +#define FUNCTION_NAME vect_s32_min + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_CUR_MIN (NSTACKWORDS-16-4) + +#define b x10 // ![0x%08X] +#define N x11 // ![%d] +#define tail x12 // ![0x%X] +#define tmp x13 // ![%d] +#define tmz x18 // ![%d] + + + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.align 16; /* Translation error on this line: unexpected token at position 9. */ + +FUNCTION_NAME: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + { li t3, 0 ; slli tail, N, SIZEOF_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { srli N, N, EPV_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; slli tmp, N, 5 } + la t3, vpu_vec_0x7FFFFFFF + { addi t3,sp, (STACK_VEC_CUR_MIN)*4 ; xm.vldr t3} + { add t3, b, tmp ; xm.vstr t3} + { xm.mkmsk tail, tail ; xm.vldr t3} + { addi t3,sp, (STACK_VEC_CUR_MIN)*4 ; nop } + xm.vstrpv t3, tail + + // Tail is fully accounted for in cur_min now. + +#undef tail +#define cur_min x12 // ![0x%08X] + + { addi tmp,sp, (STACK_VEC_TMP)*4 ; mv cur_min, t3 } + { nop ; xm.vclrdr } + { mv t3, b ; xm.brff N, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + // cur_min[] saved in stack + + { mv b, t3 ; xm.vldr t3} // vR[i] = b[i] + { addi N, N, -1 ; xm.vlsub cur_min} // vR[i] = cur_min[i] - b[i] + { addi t3,sp, 0 ; xm.vdepth1 } // vR[0] = [bitmask -- 1 where vR[i] < 0] b[i] > cur_min[i] + { xm.mkmski tmp, 1 ; nop } + xm.vstrpv t3, tmp + { mv t3, b ; lw tmp, 0 (sp)} + { mv tmz, tmp ; xm.vldr t3} +xm.zip tmz, tmp, 0 + { mv tmz, tmp ; li t3, 32 } +xm.zip tmz, tmp, 0 + { xm.not tmp, tmp ; nop } + xm.vstrpv cur_min, tmp + { add t3, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + + + { addi t3,sp, (STACK_VEC_CUR_MIN)*4 ; lw cur_min, (STACK_VEC_CUR_MIN)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { li N, 7 ; addi t3, t3, 4 } + .L_loop2_top: + { addi N, N, -1 ; lw a0,0 ( t3)} + { xm.slt tmp, cur_min, a0 ; nop } + {xm.shli tmp, tmp, 1; nop} + { addi t3, t3, 4 ; xm.bru tmp } + { mv cur_min, a0 ; nop } + { mv a0, cur_min ; xm.bt N, .L_loop2_top } + + xm.lddsp s5,s4,16 + xm.lddsp s3,s2,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + +//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.L_end: + .size FUNCTION_NAME, .L_end - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_mul.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_mul.S new file mode 100644 index 00000000..f3342a09 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_mul.S @@ -0,0 +1,109 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_s32_mul( + int32_t a[], + const int32_t b[], + const int32_t c[], + const unsigned len, + const int b_shr, + const int c_shr); +*/ + + +#define NSTACKWORDS (8+8) + +#define FUNCTION_NAME vect_s32_mul + +#define STACK_VEC_TMP 0 +#define STACK_BYTEMASK 12 + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define _32 x20 +#define tmp_vec x21 +#define bytemask len + +.text +.p2align 2 + + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; nop } + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + + xm.stdsp s3,s2,32 + xm.stdsp s5,s4,40 + { li _32, 32 ; xm.vclrdr } + mv shr_c, a5 + { xm.mkmsk t3, t3 ; nop} + mv shr_b, a4 + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { xm.mkmski t3, 32 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { add b, b, _32 ; nop } + { add c, c, _32 ; xm.vlmul0 tmp_vec} + { addi len, len, -1 ; xm.vstr a} + { add a, a, _32 ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff bytemask, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { mv t3, tmp_vec ; xm.vlmul0 tmp_vec} + { nop ; xm.vstd tmp_vec} + xm.vstrpv tmp_vec, bytemask + xm.vstrpv a, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,32 + xm.lddsp s5,s4,40 + + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_nmacc.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_nmacc.S new file mode 100644 index 00000000..8d1ac1e0 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_nmacc.S @@ -0,0 +1,119 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_s32_nmacc( + int32_t acc[], + const int32_t b[], + const int32_t c[], + const unsigned len, + const int acc_shr, + const int b_shr, + const int c_shr); +*/ + + +#define NSTACKWORDS (8+8+4) + +#define FUNCTION_NAME vect_s32_nmacc + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_BYTEMASK 7 + +#define acc x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define _32 x20 +#define tmp_vec x21 +#define shr_acc x22 +#define bytemask len + +.text +.p2align 2 + + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; sw s6, 24 (sp)} + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + mv shr_acc,a4 + + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + { li _32, 32 ; xm.vclrdr } + mv shr_c, a6 + { xm.mkmsk t3, t3 ; nop} + mv shr_b, a5 + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { xm.mkmski t3, 32 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr acc, shr_acc + xm.vstrpv acc, t3 + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { add b, b, _32 ; nop } + { add c, c, _32 ; xm.vlmul0 tmp_vec} + { nop ; xm.vlsub acc} + { addi len, len, -1 ; xm.vstr acc} + { add acc, acc, _32 ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff bytemask, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr acc, shr_acc + xm.vstrpv acc, bytemask + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { mv t3, tmp_vec ; xm.vlmul0 tmp_vec} + { nop ; xm.vlsub acc} + { nop ; xm.vstd tmp_vec} + xm.vstrpv tmp_vec, bytemask + xm.vstrpv acc, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + + + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; lw s6, 24 (sp)} + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_scale.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_scale.S new file mode 100644 index 00000000..9c511596 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_scale.S @@ -0,0 +1,109 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* +headroom_t vect_s32_scale( + int32_t a[], + const int32_t b[], + const unsigned len, + const int32_t c, + const right_shift_t b_shr, + const right_shift_t c_shr); +*/ + + +#define NSTACKWORDS (8+8) + +#define FUNCTION_NAME vect_s32_scale + +#define STACK_VEC_TMP 0 +#define STACK_BYTEMASK 12 + +#define a x10 +#define b x11 +#define len x12 +#define c x13 +#define shr_b x18 +#define _32 x19 +#define tmp_vec x20 +#define bytemask len + +.text +.p2align 2 + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,32 + xm.stdsp s5,s4,40 + { li t3, 0 ; nop } + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { li _32, 32 ; xm.vclrdr } + mv shr_b, a4 + { xm.mkmsk t3, t3 ; nop} + { xm.mkmski t3, 32 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + xm.stdsp c,c,((STACK_VEC_TMP/2)+0)*8 + xm.stdsp c,c,((STACK_VEC_TMP/2)+1)*8 + xm.stdsp c,c,((STACK_VEC_TMP/2)+2)*8 + xm.stdsp c,c,((STACK_VEC_TMP/2)+3)*8 + mv tmp_vec, a5 + { addi c,sp, (STACK_VEC_TMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + xm.vlashr c, tmp_vec + xm.vstrpv c, t3 + + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr b, shr_b + { add b, b, _32 ; xm.vlmul0 c} + { addi len, len, -1 ; xm.vstr a} + { add a, a, _32 ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff bytemask, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, shr_b + { mv t3, c ; xm.vlmul0 c} + { nop ; xm.vstd t3} + xm.vstrpv t3, bytemask + xm.vstrpv a, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,32 + xm.lddsp s5,s4,40 + + { li a0, 32 ; xm.vgetc t3} + { xm.zexti t3, 5 ; srli a1, t3, 8 } + { xm.shr a0, a0, a1 ; addi t3, t3, 1 } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_split_accs.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_split_accs.S new file mode 100644 index 00000000..71281fb8 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_split_accs.S @@ -0,0 +1,114 @@ +// Copyright 2021-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + void vect_s32_split_accs( + split_acc_s32_t a[], + const int32_t b[], + const unsigned length); +*/ + + +#define NSTACKWORDS (20) + +#define FUNCTION_NAME vect_s32_split_accs + +#define split x10 +#define merged x11 +#define len x12 +#define _32 x13 + +#define tmpR x18 +#define tmpD x19 + + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4 + xm.stdsp s3,s2,64 + + { li t3, 15 ; li _32, 32 } + { add len, len, t3 ; nop } + { srli len, len, 4 ; addi t3,sp, 0 } + + .L_loop_top: + + xm.lddi tmpD,tmpR, 0(merged) +xm.zip tmpR, tmpD, 4 + { nop ; sw tmpR, 0 (sp)} + { nop ; sw tmpD, 32 (sp)} + + xm.lddi tmpD,tmpR, 8(merged) +xm.zip tmpR, tmpD, 4 + { nop ; sw tmpR, 4 (sp)} + { nop ; sw tmpD, 36 (sp)} + + xm.lddi tmpD,tmpR, 16(merged) +xm.zip tmpR, tmpD, 4 + { nop ; sw tmpR, 8 (sp)} + { nop ; sw tmpD, 40 (sp)} + + xm.lddi tmpD,tmpR, 24(merged) +xm.zip tmpR, tmpD, 4 + { add merged, merged, _32 ; sw tmpR, 12 (sp)} + { nop ; sw tmpD, 44 (sp)} + + xm.lddi tmpD,tmpR, 0(merged) +xm.zip tmpR, tmpD, 4 + { nop ; sw tmpR, 16 (sp)} + { nop ; sw tmpD, 48 (sp)} + + xm.lddi tmpD,tmpR, 8(merged) +xm.zip tmpR, tmpD, 4 + { nop ; sw tmpR, 20 (sp)} + { nop ; sw tmpD, 52 (sp)} + + xm.lddi tmpD,tmpR, 16(merged) +xm.zip tmpR, tmpD, 4 + { nop ; sw tmpR, 24 (sp)} + { nop ; sw tmpD, 56 (sp)} + + xm.lddi tmpD,tmpR, 24(merged) +xm.zip tmpR, tmpD, 4 + { nop ; sw tmpR, 28 (sp)} + { addi len, len, -1 ; sw tmpD, 60 (sp)} + + + { addi t3,sp, 32 ; xm.vldd t3} + { add split, split, _32 ; xm.vstd split} + { addi t3,sp, 0 ; xm.vldd t3} + { add split, split, _32 ; xm.vstd split} + + { add merged, merged, _32 ; nop } + bnez len, .L_loop_top + + .L_finish: + xm.lddsp s3,s2,64 + { nop ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sqrt.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sqrt.S new file mode 100644 index 00000000..e18d8670 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sqrt.S @@ -0,0 +1,185 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +headroom_t vect_s32_sqrt( + int32_t a[], + const int32_t b[], + const unsigned length, + const right_shift_t b_shr, + const unsigned depth); + +*/ + + +#define NSTACKVECTS (3) +#define NSTACKWORDS (12+8*(NSTACKVECTS)+4) + +#define FUNCTION_NAME vect_s32_sqrt + +// Temporary vector needed because there's no instruction to do vR[] * vR[] +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +// Holds the shifted values of b[] while we're solving it. +#define STACK_VEC_TARGET (NSTACKWORDS-16-4) +// Holds the power of 2 that is currently being worked on inside hte inner loop. +// @todo If we had an instruction that set each vR[k] to the value of a register, this wouldn't be needed. +#define STACK_VEC_POW (NSTACKWORDS-24-4) + +#define STACK_DEPTH 0 + +#define a x10 +#define b x11 +#define length x12 +#define b_shr x13 + +#define depth x18 +#define mask_vec x19 +#define _32 x20 +#define _1 x21 +#define tmp x24 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,24 + sw a4, (STACK_DEPTH)*4 (sp) +// Set VPU mode to 32-bit +// (length << 2) is the length of the vector in bytes. +{ li t3, 0 ; sw s8, 4 (sp)} +{ slli length, length, 2 ; xm.vsetc t3} + +// Maximum supported depth is 31 +{ li tmp, 31 ; lw t3, (STACK_DEPTH)*4 (sp)} +{ xm.assert t3 ; nop } +{ xm.sltu t3, tmp, t3 ; li _1, 1 } +{ li _32, 32 ; xm.brff t3, .L_vect_loop_top }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; sw tmp, (STACK_DEPTH)*4 (sp)} + + +.L_vect_loop_top: + + // mask_vec is a byte mask for the elements of a[] that we're currently working on. + // using VSTRPV with mask_vec prevents us from corrupting the headroom register. + // depth is the number of MSBs that we're solving for + { xm.mkmsk mask_vec, length ; lw depth, (STACK_DEPTH)*4 (sp)} + + // First initialize the target vector using b[] + // (Doing this first allows this function to operate in-place on b[] if desired) + // @todo If we wanted to, we could do a VSIGN + VLMUL here to take an absolute value of each b[k], + // since this function will not work for any negative b[k]. + xm.vlashr b, b_shr + { addi t3,sp, (STACK_VEC_TARGET)*4 ; add b, b, _32 } + xm.vstrpv t3, mask_vec + + // Initialize the result (a[]) with 0's + { nop ; xm.vclrdr } + xm.vstrpv a, mask_vec + + // VEC_POW[] is the bit we're currently solving for. Initialize to the first non-sign bit. + // (The VSTD is to zero out the VEC_POW[] elements that are going to be masked out, because + // we're going to use VEC_POW[] later to update the headroom register) + la t3, vpu_vec_0x40000000 + { addi t3,sp, (STACK_VEC_POW)*4 ; xm.vldr t3} + { nop ; xm.vstd t3} + xm.vstrpv t3, mask_vec + + // This saves us a few cycles on the first iteration (because of loop alignment, we'd need a + // 'bu .L_sqrt_loop_top' here even if we didn't want to skip ahead). It's necessary because + // we don't want to right-shift VEC_POW[] on the first iteration (it's already 2^30), and we + // can't fix that by initializing VEC_POW[] to 0x80000000 above because that's negative and + // VLASHR is an arithmetic shift. + { addi t3,sp, (STACK_VEC_TARGET)*4 ; xm.bu .L_first_iter } + + // Inner loop. Iteratively solving for the square root bit-by-bit + // 12 instructions + 1 FNOP + .p2align 4 + .L_sqrt_loop_top: + + // Load the next power of 2 and store it back to VEC_POW[] + xm.vlashr t3, _1 + xm.vstrpv t3, mask_vec + + // Add the current power of 2 to each a[] to get the next value to be tested. + // test[k] <-- a[k] + VEC_POW + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vladd a} + + + // vR[] contains the values we're testing. Store it and square it + // vR[k] <-- ( test[k] * test[k] ) >> 30 + xm.vstrpv t3, mask_vec + { addi t3,sp, (STACK_VEC_TARGET)*4 ; xm.vlmul0 t3} + + .L_first_iter: + + // Subtract the squared test values from the target vector + // vR[k] <-- target[k] - (( test[k] * test[k] ) >> 30) + { nop ; xm.vlsub t3} + + // If vR[k] is negative, the test value was too large, so we don't want to update those a[k] + // for which vR[k] is negative. + + // vR[k] = a[k] + MAX( signum( vR[k] ), 0 ) * VEC_POW[k] + + { addi depth, depth, -1 ; xm.vsign } + { addi t3,sp, (STACK_VEC_POW)*4 ; xm.vpos } + { nop ; xm.vlmul0 t3} + { nop ; xm.vladd a} + + // Store the updated results in a[] + xm.vstrpv a, mask_vec + { nop ; xm.bt depth, .L_sqrt_loop_top } + .L_sqrt_loop_bot: + + // a[] now contains the results, but we haven't updated the headroom register because we've only + // been using VSTRPV. So, update the headroom register + // @todo Do we need to update the headroom register? Aren't we more or less guaranteed there's no + // headroom, because we got rid of the headroom of b[]? Should work out the math on this later. + + // We used mask_vec when initializing VEC_POW[], so we can use that here to avoid corrupting + // the headroom register with data that comes after a[]. x28 is already pointing at VEC_POW[]. + xm.vstrpv t3, mask_vec + { sub length, length, _32 ; xm.vldr t3} + + // If (length - 32) < 1 we're done. + { xm.slt tmp, length, _1 ; xm.vstr t3} + { add a, a, _32 ; nop }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + beqz tmp, .L_vect_loop_top +.L_vect_loop_bot: + +.L_finish: + + xm.lddsp s3,s2,8 + xm.lddsp s5,s4,16 + xm.lddsp s7,s6,24 +{ li a0, 31 ; xm.vgetc t3} +{ xm.zexti t3, 5 ; lw s8, 4 (sp)} +{ sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sum.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sum.S new file mode 100644 index 00000000..381395b0 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_sum.S @@ -0,0 +1,98 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + + +/* + int64_t vect_s32_sum( + const int32_t b[], + const unsigned length); +*/ + + +#include "../asm_helper.h" + + +#define FUNCTION_NAME vect_s32_sum +#define NSTACKWORDS (16+4) + + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) + +#define b x10 +#define N x11 +#define tail x12 + + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + { li t3, 0 ; nop } + { slli tail, N, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, N, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.vclrdr } + { srli N, N, EPV_LOG2_S32 ; xm.brff tail, .L_tail_dealt_with }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri N, N, EPV_LOG2_S32 \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + la t3, vpu_vec_0x40000000 + { addi s2,sp, (STACK_VEC_TMP)*4 ; xm.vldr t3} + { nop ; xm.vstd s2} + { xm.mkmsk tail, tail ; slli N, N, 3 } + xm.vstrpv s2, tail + sh2add s3, N, b + { nop ; xm.vldc s2} + { nop ; xm.vclrdr } + { srli N, N, 3 ; xm.vlmacc0 s3} + { li t3, 32 ; xm.vldc t3} + +.L_tail_dealt_with: + la t3, vpu_vec_0x40000000 + { li t3, 32 ; xm.vldc t3} + { addi a2,sp, (STACK_VEC_TMP)*4 ; xm.brff N, .L_loop_bot }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + +.L_loop_top: + { addi N, N, -1 ; xm.vlmacc0 b} + { add b, b, t3 ; xm.bt N, .L_loop_top } +.L_loop_bot: + +.L_finish: + + // Requires vC to be filled with 0x40000000, which it already should be. + + la t3, vpu_vec_0x80000000 + { nop ; xm.vstr a2} + { nop ; xm.vlmacc0 t3} + + la t3, vpu_vec_zero + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vldr t3} + { nop ; xm.vlmaccr0 t3} + { nop ; xm.vstd t3} + { nop ; xm.vlmaccr0 t3} + { nop ; xm.vstr t3} + { nop ; lw a1,0 ( t3)} + { addi a1, a1, 8 ; lw a0,4 ( t3)} + + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_fend: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_fend - FUNCTION_NAME + + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_f32.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_f32.S new file mode 100644 index 00000000..cee1739c --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_f32.S @@ -0,0 +1,91 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +.text + +/* + + void vect_s32_to_vect_f32( + float a[], + const int32_t b[], + const unsigned length, + const exponent_t b_exp); + +*/ + +#define NSTACKWORDS 8 +#define FUNC_NAME vect_s32_to_vect_f32 + +.globl FUNC_NAME +.type FUNC_NAME,@function + +#define a x10 +#define b x11 +#define len x12 +#define b_exp x13 + +#define _0 x18 +#define tmp1 x19 +#define tmp0 x20 + +.p2align 4 +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + { li _0, 0 ; li s3, 23 } + { add b_exp, b_exp, s3 ; nop } + xm.stdsp s5,s4,8 + + // handle tail first + { srli t3, len, 1 ; xm.zexti len, 1 } + { mv len, t3 ; xm.brff len, .L_pre_loop }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + xm.ldd tmp0,tmp1, len(b) + { xm.slt t3, tmp0, _0 ; slli tmp1, len, 1 } + beqz t3, .L_posT + xm.neg tmp0, tmp0 + .L_posT: + xm.fmake tmp0, t3, b_exp, _0, tmp0 + xm.stw tmp0,tmp1( a)/* XAT Warning: "Falling back on assumption: the int < 12 for the integer value of the item at position 2 in the instruction's operands in stwi tmp0, a,tmp1\nMessage: The offset can be encoded in s2rus immediate" */ + + .L_pre_loop: + + { addi len, len, -1 ; xm.brff len, .L_loop_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop: + xm.ldd tmp0,tmp1, len(b) + { xm.slt t3, tmp1, _0 ; nop } + beqz t3, .L_pos1 + xm.neg tmp1, tmp1 + .L_pos1: + xm.fmake tmp1, t3, b_exp, _0, tmp1 + slt t3, tmp0, _0 + beqz t3, .L_pos0 + xm.neg tmp0, tmp0 + .L_pos0: + xm.fmake tmp0, t3, b_exp, _0, tmp0 + xm.std tmp0,tmp1, len(a) + { addi len, len, -1 ; xm.bt len, .L_loop } + .L_loop_end: + + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + + + // RETURN_REG_HOLDER + .set FUNC_NAME.nstackwords,NSTACKWORDS + .globl FUNC_NAME.nstackwords + .set FUNC_NAME.maxcores,1 + .globl FUNC_NAME.maxcores + .set FUNC_NAME.maxtimers,0 + .globl FUNC_NAME.maxtimers + .set FUNC_NAME.maxchanends,0 + .globl FUNC_NAME.maxchanends +.Ltmp1: + .size FUNC_NAME, .Ltmp1-FUNC_NAME + + +#endif diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_s16.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_s16.S new file mode 100644 index 00000000..ecd35995 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_to_s16.S @@ -0,0 +1,77 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + + + +#if defined(__VX4B__) + + +/* +void vect_s32_to_vect_s16( + int16_t a[], + const int32_t b[], + const unsigned length, + const int v_shr); +*/ + +#include "../asm_helper.h" + +.text +.p2align 2 + + +#define NSTACKWORDS (8) + +#define FUNCTION_NAME vect_s32_to_vect_s16 + +#define a x10 +#define b x11 +#define len x12 +#define b_shr x13 +#define tail x18 +#define _16 x19 + + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + { li t3, 0 ; li _16, 16 } + { sub b_shr, b_shr, _16 ; xm.vsetc t3} + { srli len, len, EPV_LOG2_S32 ; slli tail, len, SIZEOF_LOG2_S16 }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { nop ; xm.zexti tail, 4 } + { xm.mkmsk tail, tail ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { xm.mkmski t3, 16 ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr b, b_shr + { add b, b, _16 ; xm.vdepth16 } + { addi len, len, -1 ; add b, b, _16 } + xm.vstrpv a, t3 + { add a, a, _16 ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + xm.vlashr b, b_shr + { nop ; xm.vdepth16 } + xm.vstrpv a, tail + +.L_finish: + xm.lddsp s3,s2,0 + { nop ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_unzip.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_unzip.S new file mode 100644 index 00000000..66132482 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_unzip.S @@ -0,0 +1,69 @@ +// Copyright 2021-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + void vect_s32_unzip( + int32_t a[], + int32_t b[], + const complex_s32_t c[], + const unsigned length); +*/ + + +#define NSTACKWORDS (4+4) + +#define FUNCTION_NAME vect_s32_unzip + + + +#define a x10 +#define b x11 +#define c x12 +#define len x13 + +#define tmpA x18 +#define tmpB x19 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + + { addi len, len, -1 ; xm.brff len, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + xm.ldd tmpA,tmpB, len(c) + xm.stw tmpA,len ( a) + xm.stw tmpB,len ( b) + { addi len, len, -1 ; xm.bt len, .L_loop_top } + + .L_finish: + xm.lddsp s3,s2,0 + { nop ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_zip.S b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_zip.S new file mode 100644 index 00000000..7f33d758 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s32/vect_s32_zip.S @@ -0,0 +1,143 @@ +// Copyright 2021-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +// XMOS Public License: Version 1 + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + void vect_s32_zip( + complex_s32_t a[], + const int32_t b[], + const int32_t c[], + const unsigned length, + const right_shift_t b_shr, + const right_shift_t c_shr); +*/ + + +#define NSTACKWORDS (8+2*8+4) + +#define FUNCTION_NAME vect_s32_zip + +#define STACK_VEC_C (NSTACKWORDS-8-4) +#define STACK_VEC_B (NSTACKWORDS-16-4) + + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define b_shr x18 +#define c_shr x19 + +#define vec_B x20 +#define vec_C x21 +#define _28 x22 +#define _32 x23 + + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + { li t3, 0 ; sw s8, 24 (sp)} + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { addi vec_B,sp, (STACK_VEC_B)*4 ; sw t3, 28 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'STWSP outside of known frame - offset may need correction' */ + { li t3, 2 ; xm.vclrdr } +{ xm.bitrev t3, t3 ; nop} +xm.vstd vec_B + { addi vec_C,sp, (STACK_VEC_C)*4 ; sw t3,0 ( vec_B)} + { li _32, 32 ; xm.vldc vec_B} + mv b_shr, a4 + { li _28, 28 ; nop} + + mv c_shr,a5 + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ +// { nop ; xm.bu .L_loop_top } + + .p2align 4 + .L_loop_top: + xm.vlashr b, b_shr + { add b, b, _32 ; xm.vstr vec_B} + xm.vlashr c, c_shr + { add a, a, _32 ; addi len, len, -1 } + + { add vec_C, vec_C, _28 ; xm.vstr vec_C} + { add vec_B, vec_B, _28 ; xm.vclrdr } + { addi vec_C, vec_C, -4 ; xm.vlmaccr0 vec_C} + { addi vec_B, vec_B, -4 ; xm.vlmaccr0 vec_B} + + { addi vec_C, vec_C, -4 ; xm.vlmaccr0 vec_C} + { addi vec_B, vec_B, -4 ; xm.vlmaccr0 vec_B} + { addi vec_C, vec_C, -4 ; xm.vlmaccr0 vec_C} + { addi vec_B, vec_B, -4 ; xm.vlmaccr0 vec_B} + //FNOP + { addi vec_C, vec_C, -4 ; xm.vlmaccr0 vec_C} + { addi vec_B, vec_B, -4 ; xm.vlmaccr0 vec_B} + { sub t3, a, _32 ; xm.vstr a} + { add a, a, _32 ; xm.vclrdr } + + { addi vec_C, vec_C, -4 ; xm.vlmaccr0 vec_C} + { addi vec_B, vec_B, -4 ; xm.vlmaccr0 vec_B} + { addi vec_C, vec_C, -4 ; xm.vlmaccr0 vec_C} + { addi vec_B, vec_B, -4 ; xm.vlmaccr0 vec_B} + //FNOP + { addi vec_C, vec_C, -4 ; xm.vlmaccr0 vec_C} + { addi vec_B, vec_B, -4 ; xm.vlmaccr0 vec_B} + { addi vec_C, vec_C, -4 ; xm.vlmaccr0 vec_C} + { addi vec_B, vec_B, -4 ; xm.vlmaccr0 vec_B} + + { add c, c, _32 ; nop } + { addi vec_B,sp, (STACK_VEC_B)*4 ; xm.vstr t3} + { addi vec_C,sp, (STACK_VEC_C)*4 ; xm.bt len, .L_loop_top } + .L_loop_bot: + + { nop ; lw len, 28 (sp)} + { srli len, len, SIZEOF_LOG2_S32 ; xm.brff len, .L_finish }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32", 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, b_shr + { nop ; xm.vstr vec_B} + xm.vlashr c, c_shr + { addi len, len, -1 ; xm.vstr vec_C} + +#define tmpB x22 +#define tmpC x23 + .L_tail_loop_top: + { nop ; xm.ldw tmpB,len ( vec_B)} + { nop ; xm.ldw tmpC,len ( vec_C)} + xm.std tmpB,tmpC, len(a) + { addi len, len, -1 ; xm.bt len, .L_tail_loop_top } + .L_tail_loop_bot: + + .L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + { nop ; lw s8, 24 (sp)} + { nop ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_s8/vect_s8_is_negative.S b/lib_xcore_math/src/arch/vx4b/vect_s8/vect_s8_is_negative.S new file mode 100644 index 00000000..bdf06501 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_s8/vect_s8_is_negative.S @@ -0,0 +1,81 @@ +// Copyright 2021-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +void vect_s8_is_negative( + int8_t a[], + const int8_t b[], + const unsigned len); +*/ + + +#include "../asm_helper.h" + +#define NSTACKWORDS (8) + +#define FUNCTION_NAME vect_s8_is_negative + +#define a x10 +#define b x11 +#define len x12 +#define _32 x13 +#define vec_0xC1 x18 +#define tail x19 + +.text +.p2align 2 + + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x200 + xm.stdsp s3,s2,8 + { mv t3, len ; xm.vsetc t3} + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S8 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S8 \nMessage: The shift amount is not 32" */ + { li _32, 32 ; xm.mkmsk tail, t3 } +lui t3, %hi(vpu_vec_0xC1) + addi t3,t3, %lo(vpu_vec_0xC1) + { mv vec_0xC1, t3 ; mv t3, b } + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + { add t3, t3, _32 ; xm.vldr t3} + { nop ; xm.vsign } + { nop ; xm.vlsub vec_0xC1} + { nop ; xm.vpos } + { addi len, len, -1 ; xm.vstr a} + { add a, a, _32 ; xm.bt len, .L_loop_top } + .L_loop_bot: + + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vldr t3} + { nop ; xm.vsign } + { nop ; xm.vlsub vec_0xC1} + { nop ; xm.vpos } + xm.vstrpv a, tail + + +.L_finish: + xm.lddsp s3,s2,8 + { nop ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_add.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_add.S new file mode 100644 index 00000000..1dee782c --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_add.S @@ -0,0 +1,156 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + + + +.text +.p2align 2 + + +#define NSTACKWORDS (32) +#define STACK_VEC_TMP (NSTACKWORDS-8-1) +#define STACK_BYTEMASK 8 + +#define a x10 +#define b x11 +#define c x12 +#define len x13 + +#define shr_b x18 +#define shr_c x19 +#define _32 x20 +#define tmp_vec x21 +#define bytemask len + + + + + +/* +headroom_t vect_s16_add( + int16_t a[], + const int16_t b[], + const int16_t c[], + const unsigned len, + const int b_shr, + const int c_shr); +*/ +vect_s16_add: +FNAME_S16: + xm.entsp (NSTACKWORDS)*4 + li t3, 0x100 + { slli t3, len, SIZEOF_LOG2_S16 ; xm.vsetc t3} + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S16 } + { nop ; xm.bu .L_apply_op } +.L_func_end_s16: + + + + +/* +headroom_t vect_s32_add( + int32_t a[], + const int32_t b[], + const int32_t c[], + const unsigned len, + const int b_shr, + const int c_shr); +*/ +vect_s32_add: +FNAME_S32: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; nop } + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { nop ; xm.bu .L_apply_op } +.L_func_end_s32: + + + + + + +/* + Code shared by all functions above +*/ +.type .L_apply_op,@function +.L_apply_op: + + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + { li _32, 32 ; xm.vclrdr } + mv shr_c, a5 + mv shr_b, a4 + { xm.mkmsk t3, t3 ; nop } + { addi tmp_vec,sp, (NSTACKWORDS-8-1)*4 ; nop} + { xm.mkmski t3, 32 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { add b, b, _32 ; nop } + { add c, c, _32 ; xm.vladd tmp_vec} + { addi len, len, -1 ; xm.vstr a} + { add a, a, _32 ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff bytemask, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { mv t3, tmp_vec ; xm.vladd tmp_vec} + { nop ; xm.vstd tmp_vec} + xm.vstrpv tmp_vec, bytemask + xm.vstrpv a, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + + // Should work for both 16 and 32 bit modes + { li a0, 32 ; xm.vgetc t3} + { xm.zexti t3, 5 ; srli a1, t3, 8 } + { xm.shr a0, a0, a1 ; addi t3, t3, 1 } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_end_apply_op: + .size .L_apply_op, .L_end_apply_op - .L_apply_op + + + +.global vect_s16_add +.type vect_s16_add,@function +.set vect_s16_add.nstackwords,NSTACKWORDS; .global vect_s16_add.nstackwords /* Translation error on this line: unexpected token at position 41. */ +.set vect_s16_add.maxcores,1; .global vect_s16_add.maxcores /* Translation error on this line: unexpected token at position 28. */ +.set vect_s16_add.maxtimers,0; .global vect_s16_add.maxtimers /* Translation error on this line: unexpected token at position 29. */ +.set vect_s16_add.maxchanends,0; .global vect_s16_add.maxchanends /* Translation error on this line: unexpected token at position 31. */ +.size vect_s16_add, .L_func_end_s16 - vect_s16_add + +.global vect_s32_add +.type vect_s32_add,@function +.set vect_s32_add.nstackwords,NSTACKWORDS; .global vect_s32_add.nstackwords /* Translation error on this line: unexpected token at position 41. */ +.set vect_s32_add.maxcores,1; .global vect_s32_add.maxcores /* Translation error on this line: unexpected token at position 28. */ +.set vect_s32_add.maxtimers,0; .global vect_s32_add.maxtimers /* Translation error on this line: unexpected token at position 29. */ +.set vect_s32_add.maxchanends,0; .global vect_s32_add.maxchanends /* Translation error on this line: unexpected token at position 31. */ +.size vect_s32_add, .L_func_end_s32 - vect_s32_add + + + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_headroom.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_headroom.S new file mode 100644 index 00000000..3aeadfa4 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_headroom.S @@ -0,0 +1,134 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + + +/* +headroom_t vect_s16_headroom( + const int16_t* v, + const unsigned length); +*/ + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKWORDS 12 + +#define STACK_TMP_VEC 0 + +#define arg_v x10 +#define arg_len x11 + + +#define FUNCTION_NAME vect_s16_headroom + +FUNCTION_NAME: + { li t3, 32 ; xm.entsp (NSTACKWORDS)*4 } + { slli t3, t3, 3 ; xm.vclrdr } + { slli a2, arg_len, 1 ; xm.vsetc t3} + { srli arg_len, arg_len, 4 ; mv t3, arg_v } + + { addi a0,sp, (STACK_TMP_VEC)*4 ; nop } + { li a3, 32 ; nop } + { xm.zexti a2, 5 ; xm.brff arg_len, .L_loop_bot_s16 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top_s16 } + + .p2align 4 + .L_loop_top_s16: + { add t3, t3, a3 ; xm.vldr t3} + { addi arg_len, arg_len, -1 ; xm.vstr a0} + { nop ; xm.bt arg_len, .L_loop_top_s16 } + +.L_loop_bot_s16: + { nop ; xm.brff a2, .L_finish16 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { xm.mkmsk a2, a2 ; xm.vstd a0} + { mv t3, a0 ; xm.vldr t3} + xm.vstrpv a0, a2 + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish16: + { li a0, 15 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_func_end_s16: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end_s16 - FUNCTION_NAME + + +#undef FUNCTION_NAME + + + + + + + +/* +headroom_t vect_s32_headroom( + const int32_t* v, + const unsigned length); +*/ + +#define FUNCTION_NAME vect_s32_headroom + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + + { li t3, 0 ; slli a2, arg_len, 2 } + { srli arg_len, arg_len, 3 ; xm.vsetc t3} + { addi a0,sp, (STACK_TMP_VEC)*4 ; mv t3, arg_v } + { li a3, 32 ; xm.vclrdr } + { xm.zexti a2, 5 ; xm.brff arg_len, .L_loop_bot_s32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top_s32 } + + .p2align 4 + .L_loop_top_s32: + { add t3, t3, a3 ; xm.vldr t3} + { addi arg_len, arg_len, -1 ; xm.vstr a0} + { nop ; xm.bt arg_len, .L_loop_top_s32 } + + .L_loop_bot_s32: + { nop ; xm.brff a2, .L_finish32 }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { xm.mkmsk a2, a2 ; xm.vstd a0} + { mv t3, a0 ; xm.vldr t3} + xm.vstrpv a0, a2 + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + + .L_finish32: + { li a0, 31 ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + {sub a0, a0, t3; nop} + + { nop ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end_s32: + +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end_s32 - FUNCTION_NAME + +#undef FUNCTION_NAME + + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_rect.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_rect.S new file mode 100644 index 00000000..16e5b664 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_rect.S @@ -0,0 +1,138 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + + +#include "../asm_helper.h" + +.text +.p2align 2 + +#define NSTACKWORDS (8+4) + +#define STACK_TMP_VEC (NSTACKWORDS-8-4) + +#define a x10 +#define b x11 +#define len x12 +#define tail x13 + + + + +/* +headroom_t vect_s16_rect( + int16_t a[], + const int16_t b[], + const unsigned length); +*/ + +vect_s16_rect: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x0100 + { slli tail, len, SIZEOF_LOG2_S16 ; srli len, len, EPV_LOG2_S16 }/* Multiple XAT warnings: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32", "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.vsetc t3} + { nop ; xm.bu .L_apply_op } + +.L_func_end_s16: + + + + + +/* +headroom_t vect_s32_rect( + int32_t a[], + const int32_t b[], + const unsigned length); +*/ + +vect_s32_rect: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; slli tail, len, SIZEOF_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli tail, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { srli len, len, EPV_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti tail, 5 ; xm.bu .L_apply_op } + +.L_func_end_s32: + + + + +#undef a +#undef b +#undef len + +/* + When branching here: + * a --> x10 + * b --> x11 + * loop_count --> x12 + * tail --> x13 + * VPU mode must already be set. +*/ + +#define a x10 +#define b x11 +#define loop_count x12 +#define tail x13 + +.type .L_apply_op,@function + +.L_apply_op: + + { xm.mkmsk tail, tail ; nop } + { mv t3, b ; xm.brff loop_count, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { li a1, 32 ; xm.bu .L_loop_top } +.p2align 4 +.L_loop_top: + { add t3, t3, a1 ; xm.vldr t3} + { addi loop_count, loop_count, -1 ; xm.vpos } + { add a, a, a1 ; xm.vstr a} + { nop ; xm.bt loop_count, .L_loop_top } +.L_loop_bot: + + { nop ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + { addi t3,sp, (STACK_TMP_VEC)*4 ; xm.vldr t3} + { nop ; xm.vstd t3} + { nop ; xm.vpos } + xm.vstrpv t3, tail + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + xm.vstrpv a, tail + +.L_finish: + { li a0, 32 ; xm.vgetc t3} + { srli a1, t3, 8 ; nop } + { xm.zexti t3, 5 ; xm.shr a0, a0, a1 } + { addi t3, t3, 1 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_end_apply_op: +.size .L_apply_op, .L_end_apply_op - .L_apply_op + + + + + +.global vect_s16_rect +.type vect_s16_rect,@function +.set vect_s16_rect.nstackwords,NSTACKWORDS; .global vect_s16_rect.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set vect_s16_rect.maxcores,1; .global vect_s16_rect.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set vect_s16_rect.maxtimers,0; .global vect_s16_rect.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set vect_s16_rect.maxchanends,0; .global vect_s16_rect.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size vect_s16_rect, .L_func_end_s16 - vect_s16_rect + +.global vect_s32_rect +.type vect_s32_rect,@function +.set vect_s32_rect.nstackwords,NSTACKWORDS; .global vect_s32_rect.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set vect_s32_rect.maxcores,1; .global vect_s32_rect.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set vect_s32_rect.maxtimers,0; .global vect_s32_rect.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set vect_s32_rect.maxchanends,0; .global vect_s32_rect.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size vect_s32_rect, .L_func_end_s32 - vect_s32_rect + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_add_scalar.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_add_scalar.S new file mode 100644 index 00000000..a1989a7a --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_add_scalar.S @@ -0,0 +1,112 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +/* + +unsigned vect_sXX_add_scalar( + int32_t a[], + const int32_t b[], + const unsigned length_bytes, + const int32_t c, + const int32_t d, + const right_shift_t b_shr, + const unsigned mode_bits); + +*/ + + +#define NSTACKVECTS (1) +#define NSTACKWORDS (8+8*(NSTACKVECTS)) + +#define FUNCTION_NAME vect_sXX_add_scalar + + +#define STACK_VEC_TEMP (NSTACKWORDS-12) + +#define a x10 +#define b x11 +#define len x12 +#define c x13 +#define b_shr x18 +#define _32 x19 +#define tail x20 + + +.text +.p2align 2 + +FUNCTION_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + + mv t3, a4 + { li _32, 32 ; nop } + //{ li _32, 32 ; lw t3, (STACK_D)*4 (sp)} + xm.stdsp c,t3,((STACK_VEC_TEMP/2)+0)*8 + xm.stdsp c,t3,((STACK_VEC_TEMP/2)+1)*8 + xm.stdsp c,t3,((STACK_VEC_TEMP/2)+2)*8 + xm.stdsp c,t3,((STACK_VEC_TEMP/2)+3)*8 + +#undef c // no longer needed +#define vec_tmp x13 + + mv t3, a6 + { addi vec_tmp,sp, (STACK_VEC_TEMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + //{ addi vec_tmp,sp, (STACK_VEC_TEMP)*4 ; lw t3, (STACK_MODE_BITS)*4 (sp)}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { mv tail, len ; xm.vsetc t3} + mv b_shr, a5 + { srli len, len, 5 ; nop} + //{ srli len, len, 5 ; lw b_shr, (STACK_B_SHR)*4 (sp)} + { xm.zexti tail, 5 ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + + .L_loop_top: + xm.vlashr b, b_shr + { addi len, len, -1 ; xm.vladd vec_tmp} + { add b, b, _32 ; xm.vstr a} + { add a, a, _32 ; xm.bt len, .L_loop_top } + .L_loop_bot: + + { xm.mkmsk tail, tail ; xm.brff tail, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + xm.vlashr b, b_shr + { nop ; xm.vladd vec_tmp} + { nop ; xm.vstd vec_tmp} + xm.vstrpv a, tail + + // These three are because the headroom mask doesn't get updated by VSTRPV + xm.vstrpv vec_tmp, tail + { nop ; xm.vldd vec_tmp} + { nop ; xm.vstd vec_tmp} + + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + + { nop ; xm.vgetc t3} + { xm.zexti t3, 5 ; nop } + { mv a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_func_end: + + +.global FUNCTION_NAME +.type FUNCTION_NAME,@function +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.size FUNCTION_NAME, .L_func_end - FUNCTION_NAME + + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_max_elementwise.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_max_elementwise.S new file mode 100644 index 00000000..55e62654 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_max_elementwise.S @@ -0,0 +1,192 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +#define NSTACKWORDS (8+8+4) + +#define len x13 + +.text +.p2align 4 + + +/* +headroom_t vect_s32_max_elementwise( + int32_t a[], + const int32_t b[], + const int32_t c[], + const unsigned len, + const right_shift_t b_shr, + const right_shift_t c_shr); +*/ +#define FUNC_NAME vect_s32_max_elementwise +FUNC_NAME: + { li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + call vect_sXX_max_elementwise + { li t3, 31 ; nop } + { sub a0, t3, a0 ; xm.retsp (NSTACKWORDS)*4 } +.L_end_s32: + +.global FUNC_NAME +.type FUNC_NAME,@function +.set FUNC_NAME.nstackwords,NSTACKWORDS; .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ +.set FUNC_NAME.maxcores,1; .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ +.set FUNC_NAME.maxtimers,0; .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ +.set FUNC_NAME.maxchanends,0; .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ +.size FUNC_NAME, .L_end_s32 - FUNC_NAME +#undef FUNC_NAME + + + +/* +headroom_t vect_s16_max_elementwise( + int16_t a[], + const int16_t b[], + const int16_t c[], + const unsigned len, + const right_shift_t b_shr, + const right_shift_t c_shr); +*/ +#define FUNC_NAME vect_s16_max_elementwise +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x0100 + { slli t3, len, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + call vect_sXX_max_elementwise + { li t3, 15 ; nop } + { sub a0, t3, a0 ; xm.retsp (NSTACKWORDS)*4 } +.L_end_s16: + +.global FUNC_NAME +.type FUNC_NAME,@function +.set FUNC_NAME.nstackwords,NSTACKWORDS; .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ +.set FUNC_NAME.maxcores,1; .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ +.set FUNC_NAME.maxtimers,0; .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ +.set FUNC_NAME.maxchanends,0; .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ +.size FUNC_NAME, .L_end_s16 - FUNC_NAME +#undef FUNC_NAME + + + +/* +headroom_t vect_s8_max_elementwise( + int8_t a[], + const int8_t b[], + const int8_t c[], + const unsigned len, + const right_shift_t b_shr, + const right_shift_t c_shr); +*/ +#define FUNC_NAME vect_s8_max_elementwise +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x0200 + { mv t3, len ; xm.vsetc t3} + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S8 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S8 \nMessage: The shift amount is not 32" */ + call vect_sXX_max_elementwise + { li t3, 7 ; nop } + { sub a0, t3, a0 ; xm.retsp (NSTACKWORDS)*4 } +.L_end_s8: + +.global FUNC_NAME +.type FUNC_NAME,@function +.set FUNC_NAME.nstackwords,NSTACKWORDS; .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ +.set FUNC_NAME.maxcores,1; .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ +.set FUNC_NAME.maxtimers,0; .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ +.set FUNC_NAME.maxchanends,0; .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ +.size FUNC_NAME, .L_end_s8 - FUNC_NAME +#undef FUNC_NAME + + + +#undef len + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_BYTEMASK 6 + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define _32 x20 +#define tmp_vec x21 +#define bytemask len + + + +/** + * WARNING: This does _NOT_ use the standard ABI. It assumes x28 will contain + * the length of the tail in bytes. + */ + +vect_sXX_max_elementwise: + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + { li _32, 32 ; xm.vclrdr } + mv shr_c, a5 + { xm.mkmsk t3, t3 ; nop} + mv shr_b, a4 + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { xm.mkmski t3, 32 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + + // Deal with main vector body +.p2align 4 +.L_loop_top: + // Here we need to assume shr_b and shr_c have been chosen to guarantee 1 + // bit of headroom in each so that c[k] - b[k] can't saturate. That means + // this should be perfectly accurate if there's already at least 1 bit of + // headroom in each input. + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, t3 + xm.vlashr b, shr_b + xm.vstrpv a, t3 + { addi len, len, -1 ; xm.vlsub tmp_vec} + { add c, c, _32 ; xm.vpos } + { add b, b, _32 ; xm.vladd a} + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt len, .L_loop_top } +.L_loop_bot: + + lw bytemask, (STACK_BYTEMASK)*4(sp)/* Multiple XAT warnings: 'LDWSP has unknown offset - this may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp bytemask, STACK_BYTEMASK\nMessage: The offset can be encoded in sru6 immediate" */ + beqz bytemask, .L_finish + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, bytemask + xm.vlashr b, shr_b + xm.vstrpv a, bytemask + xm.vlsub tmp_vec + xm.vpos + mv t3, tmp_vec + xm.vladd a + xm.vstd tmp_vec + xm.vstrpv tmp_vec, bytemask + xm.vldr t3 + xm.vstr tmp_vec + xm.vstrpv a, bytemask + +.L_finish: + xm.lddsp s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + xm.lddsp s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + xm.vgetc t3 + xm.zexti t3, 5 + mv a0, t3 + ret + +.L_end_sXX: + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_min_elementwise.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_min_elementwise.S new file mode 100644 index 00000000..6e8467de --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sXX_min_elementwise.S @@ -0,0 +1,193 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + +#define NSTACKWORDS (8+8+4) + +#define len x13 + +.text +.p2align 4 + + +/* +headroom_t vect_s32_min_elementwise( + int32_t a[], + const int32_t b[], + const int32_t c[], + const unsigned len, + const right_shift_t b_shr, + const right_shift_t c_shr); +*/ +#define FUNC_NAME vect_s32_min_elementwise +FUNC_NAME: + { li t3, 0 ; xm.entsp (NSTACKWORDS)*4 } + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + call vect_sXX_min_elementwise + { li t3, 31 ; nop } + { sub a0, t3, a0 ; xm.retsp (NSTACKWORDS)*4 } +.L_end_s32: + +.global FUNC_NAME +.type FUNC_NAME,@function +.set FUNC_NAME.nstackwords,NSTACKWORDS; .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ +.set FUNC_NAME.maxcores,1; .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ +.set FUNC_NAME.maxtimers,0; .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ +.set FUNC_NAME.maxchanends,0; .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ +.size FUNC_NAME, .L_end_s32 - FUNC_NAME +#undef FUNC_NAME + + + +/* +headroom_t vect_s16_min_elementwise( + int16_t a[], + const int16_t b[], + const int16_t c[], + const unsigned len, + const right_shift_t b_shr, + const right_shift_t c_shr); +*/ +#define FUNC_NAME vect_s16_min_elementwise +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x0100 + { slli t3, len, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + call vect_sXX_min_elementwise + { li t3, 15 ; nop } + { sub a0, t3, a0 ; xm.retsp (NSTACKWORDS)*4 } +.L_end_s16: + +.global FUNC_NAME +.type FUNC_NAME,@function +.set FUNC_NAME.nstackwords,NSTACKWORDS; .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ +.set FUNC_NAME.maxcores,1; .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ +.set FUNC_NAME.maxtimers,0; .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ +.set FUNC_NAME.maxchanends,0; .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ +.size FUNC_NAME, .L_end_s16 - FUNC_NAME +#undef FUNC_NAME + + + +/* +headroom_t vect_s8_min_elementwise( + int8_t a[], + const int8_t b[], + const int8_t c[], + const unsigned len, + const right_shift_t b_shr, + const right_shift_t c_shr); +*/ +#define FUNC_NAME vect_s8_min_elementwise +FUNC_NAME: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x0200 + { mv t3, len ; xm.vsetc t3} + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S8 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S8 \nMessage: The shift amount is not 32" */ + call vect_sXX_min_elementwise + { li t3, 7 ; nop } + { sub a0, t3, a0 ; xm.retsp (NSTACKWORDS)*4 } +.L_end_s8: + +.global FUNC_NAME +.type FUNC_NAME,@function +.set FUNC_NAME.nstackwords,NSTACKWORDS; .global FUNC_NAME.nstackwords /* Translation error on this line: unexpected token at position 38. */ +.set FUNC_NAME.maxcores,1; .global FUNC_NAME.maxcores /* Translation error on this line: unexpected token at position 25. */ +.set FUNC_NAME.maxtimers,0; .global FUNC_NAME.maxtimers /* Translation error on this line: unexpected token at position 26. */ +.set FUNC_NAME.maxchanends,0; .global FUNC_NAME.maxchanends /* Translation error on this line: unexpected token at position 28. */ +.size FUNC_NAME, .L_end_s8 - FUNC_NAME +#undef FUNC_NAME + + + +#undef len + +#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_BYTEMASK 6 + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define _32 x20 +#define tmp_vec x21 +#define bytemask len + + + +/** + * WARNING: This does _NOT_ use the standard ABI. It assumes x28 will contain + * the length of the tail in bytes. + */ + +vect_sXX_min_elementwise: + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + + { li _32, 32 ; xm.vclrdr } + mv shr_c, a5 + { xm.mkmsk t3, t3 ; nop} + mv shr_b, a4 + { addi tmp_vec,sp, (STACK_VEC_TMP)*4 ; nop}/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { xm.mkmski t3, 32 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + + // Deal with main vector body +.p2align 4 +.L_loop_top: + // Here we need to assume shr_b and shr_c have been chosen to guarantee 1 + // bit of headroom in each so that c[k] - b[k] can't saturate. That means + // this should be perfectly accurate if there's already at least 1 bit of + // headroom in each input. + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, t3 + xm.vlashr b, shr_b + xm.vstrpv a, t3 + { addi len, len, -1 ; xm.vlsub tmp_vec} + { add c, c, _32 ; xm.vpos } + { add b, b, _32 ; xm.vlsub tmp_vec} + { add a, a, _32 ; xm.vstr a} + { nop ; xm.bt len, .L_loop_top } +.L_loop_bot: + + lw bytemask, (STACK_BYTEMASK)*4(sp)/* Multiple XAT warnings: 'LDWSP has unknown offset - this may need correction', "Falling back on assumption: the int < 64 for the integer value of the item at position 1 in the instruction's operands in ldwsp bytemask, STACK_BYTEMASK\nMessage: The offset can be encoded in sru6 immediate" */ + beqz bytemask, .L_finish + xm.vlashr c, shr_c + xm.vstrpv tmp_vec, bytemask + xm.vlashr b, shr_b + xm.vstrpv a, bytemask + xm.vlsub tmp_vec + xm.vpos + mv t3, tmp_vec + xm.vlsub tmp_vec + xm.vstd tmp_vec + xm.vstrpv tmp_vec, bytemask + xm.vldr t3 + xm.vstr tmp_vec + xm.vstrpv a, bytemask + +.L_finish: + xm.lddsp s3,s2,8/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + xm.lddsp s5,s4,16/* XAT Warning: "Not correcting LDDSP offset because it's not in the local frame range" */ + xm.vgetc t3 + xm.zexti t3, 5 + mv a0, t3 + ret + + +.L_end_sXX: + + +#endif //defined(__VX4B__) + + + diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_set.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_set.S new file mode 100644 index 00000000..9f2b5f38 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_set.S @@ -0,0 +1,133 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +#include "../asm_helper.h" + + +.text +.p2align 2 + +#define NSTACKWORDS 32 + +#define STACK_TMP_VEC (NSTACKWORDS-16) +#define STACK_TMP_VEC_DBL ((STACK_TMP_VEC)/2) + +#define data x10 +#define value x11 +#define length x12 + + +/* +void vect_s16_set( + int16_t data[], + const int16_t value, + const unsigned length); +*/ +vect_s16_set: + xm.entsp (NSTACKWORDS)*4 + { slli t3, value, 16 ; slli a3, length, SIZEOF_LOG2_S16 } + { xm.zexti value, 16 ; xm.zexti a3, 5 } + { or t3, t3, value ; srli length, length, EPV_LOG2_S16 } + { mv value, t3 ; xm.bu .L_set_bytes } +.L_size_end_vect_s16_set: + .size vect_s16_set, .L_size_end_vect_s16_set - vect_s16_set + + +/* +void vect_s32_set( + int32_t data[], + const int32_t value, + const unsigned length); +*/ +vect_s32_set: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { mv t3, value ; slli a3, length, SIZEOF_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli a3, length, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti a3, 5 ; srli length, length, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri length, length, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { nop ; xm.bu .L_set_bytes } +.L_size_end_vect_s32_set: + .size vect_s32_set, .L_size_end_vect_s32_set - vect_s32_set + +#undef value +#undef length +#define real x11 +#define imag x12 +#define length x13 + +/* +void vect_complex_s32_set( + complex_s32_t data[], + const int32_t real_part, + const int32_t imag_part, + const unsigned length); +*/ +vect_complex_s32_set: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { mv t3, imag ; slli a3, length, SIZEOF_LOG2_C32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli a3, length, SIZEOF_LOG2_C32 \nMessage: The shift amount is not 32" */ + { srli a2, length, 5 ; xm.zexti a3, 5 } + { nop ; xm.bu .L_set_bytes } +.L_size_end_vect_complex_s32_set: + .size vect_complex_s32_set, .L_size_end_vect_complex_s32_set - vect_complex_s32_set + + + + +#undef real +#undef imag +#undef length +#define value x11 +#define length x12 + +/* + Code shared by all functions above. +*/ +.type .L_set_bytes,@function +.L_set_bytes: + xm.stdsp value,t3,(STACK_TMP_VEC_DBL+0)*8 + xm.stdsp value,t3,(STACK_TMP_VEC_DBL+1)*8 + xm.stdsp value,t3,(STACK_TMP_VEC_DBL+2)*8 + xm.stdsp value,t3,(STACK_TMP_VEC_DBL+3)*8 + { nop ; addi t3,sp, (STACK_TMP_VEC)*4 } + { xm.mkmsk t3, a3 ; xm.vldr t3} + { li a3, 32 ; xm.brff length, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } +.p2align 3 + .L_loop_top: + { addi length, length, -1 ; xm.vstr data} + { add data, data, a3 ; xm.bt length, .L_loop_top } +.L_loop_bot: + xm.vstrpv data, t3 + xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ + +.L_end_set_bytes: + .size .L_set_bytes, .L_end_set_bytes - .L_set_bytes + + + + + +.globl vect_s16_set +.type vect_s16_set,@function +.set vect_s16_set.nstackwords,NSTACKWORDS; .global vect_s16_set.nstackwords; /* Translation error on this line: unexpected token at position 41. */ +.set vect_s16_set.maxcores,1; .global vect_s16_set.maxcores; /* Translation error on this line: unexpected token at position 28. */ +.set vect_s16_set.maxtimers,0; .global vect_s16_set.maxtimers; /* Translation error on this line: unexpected token at position 29. */ +.set vect_s16_set.maxchanends,0; .global vect_s16_set.maxchanends; /* Translation error on this line: unexpected token at position 31. */ + +.globl vect_s32_set +.type vect_s32_set,@function +.set vect_s32_set.nstackwords,NSTACKWORDS; .global vect_s32_set.nstackwords; /* Translation error on this line: unexpected token at position 41. */ +.set vect_s32_set.maxcores,1; .global vect_s32_set.maxcores; /* Translation error on this line: unexpected token at position 28. */ +.set vect_s32_set.maxtimers,0; .global vect_s32_set.maxtimers; /* Translation error on this line: unexpected token at position 29. */ +.set vect_s32_set.maxchanends,0; .global vect_s32_set.maxchanends; /* Translation error on this line: unexpected token at position 31. */ + +.globl vect_complex_s32_set +.type vect_complex_s32_set,@function +.set vect_complex_s32_set.nstackwords,NSTACKWORDS; .global vect_complex_s32_set.nstackwords; /* Translation error on this line: unexpected token at position 49. */ +.set vect_complex_s32_set.maxcores,1; .global vect_complex_s32_set.maxcores; /* Translation error on this line: unexpected token at position 36. */ +.set vect_complex_s32_set.maxtimers,0; .global vect_complex_s32_set.maxtimers; /* Translation error on this line: unexpected token at position 37. */ +.set vect_complex_s32_set.maxchanends,0; .global vect_complex_s32_set.maxchanends; /* Translation error on this line: unexpected token at position 39. */ + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_shl.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_shl.S new file mode 100644 index 00000000..2f9b013c --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_shl.S @@ -0,0 +1,169 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + +/* +headroom_t vect_s16_shl( + int16_t a[], + const int16_t b[], + const unsigned length, + const int shl); + +headroom_t vect_s32_shl( + int32_t a[],, + const int32_t b[], + const unsigned length, + const int shl); +*/ + +#include "../asm_helper.h" + + + +#define NSTACKWORDS (8+2+2+4) + +#define FUNCTION_NAME shl_vect +#define FNAME_S16 CAT(FUNCTION_NAME, _s16) +#define FNAME_S32 CAT(FUNCTION_NAME, _s32) + +#define STACK_TMP_VEC (NSTACKWORDS-10) + +#define a x10 +#define b x11 +#define len x12 +#define b_shl x13 + + +.text +.p2align 2 + + + + +vect_s16_shl: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x0100 + { slli s2, len, SIZEOF_LOG2_S16 ; sw s2, 4 (sp)}/* Multiple XAT warnings: 'STWSP outside of known frame - offset may need correction', "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli s2, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { srli len, len, EPV_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti s2, 5 ; xm.bu .L_apply_op } + + +.L_size_end_vect_s16_shl: + .size vect_s16_shl, .L_size_end_vect_s16_shl - vect_s16_shl + + + + + + + +vect_s32_shl: + + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; sw s2, 4 (sp)} + { slli s2, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli s2, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { srli len, len, EPV_LOG2_S32 ; xm.zexti s2, 5 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { nop ; xm.bu .L_apply_op } + + +.L_size_end_vect_s32_shl: + .size vect_s32_shl, .L_size_end_vect_s32_shl - vect_s32_shl + +#undef a +#undef b +#undef len +#undef b_shl + + + + + + +/* + When branching here: + * a --> x10 + * b --> x11 + * loop_count --> x12 + * shl --> x13 + * tail --> x18 + * VPU mode must already be set. +*/ + +#define a x10 +#define b x11 +#define loop_count x12 +#define b_shl x13 +#define b_shr b_shl +#define tail x18 + + +.type .L_apply_op,@function; /* Translation error on this line: unexpected token at position 27. */ + +.L_apply_op: + + { xm.neg b_shr, b_shl ; xm.zexti s2, 5 } + { xm.mkmsk tail, tail ; xm.brff loop_count, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { li t3, 32 ; xm.bu .L_loop_top } + { nop ; xm.bu .L_loop_top_inplace } + + .p2align 4 + .L_loop_top_inplace: + xm.vlashr b, b_shr + { addi loop_count, loop_count, -1 ; xm.vstr b} + { add b, b, t3 ; xm.bt loop_count, .L_loop_top_inplace } + { mv a, b ; xm.bu .L_loop_bot } + + .p2align 4 + .L_loop_top: + xm.vlashr b, b_shr + { add b, b, t3 ; xm.vstr a} + { addi loop_count, loop_count, -1 ; nop } + { add a, a, t3 ; xm.bt loop_count, .L_loop_top } + +.L_loop_bot: + + { addi t3,sp, (STACK_TMP_VEC)*4 ; xm.brff tail, .L_finish }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.vclrdr } + xm.vlashr b, b_shr + { nop ; xm.vstd t3} + xm.vstrpv t3, tail + { li a2, 32 ; xm.vldr t3} + { slli a2, a2, 3 ; xm.vstr t3} + xm.vstrpv a, tail + +.L_finish: + { li a0, 32 ; xm.vgetc t3} + { srli a1, t3, 8 ; lw s2, 4 (sp)} + { xm.zexti t3, 5 ; xm.shr a0, a0, a1 } + { addi t3, t3, 1 ; nop } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + + +.L_end_apply_op: + .size .L_apply_op, .L_end_apply_op - .L_apply_op + + + + + + + +.globl vect_s16_shl +.type vect_s16_shl,@function +.set vect_s16_shl.nstackwords,NSTACKWORDS; .global vect_s16_shl.nstackwords /* Translation error on this line: unexpected token at position 41. */ +.set vect_s16_shl.maxcores,1; .global vect_s16_shl.maxcores /* Translation error on this line: unexpected token at position 28. */ +.set vect_s16_shl.maxtimers,0; .global vect_s16_shl.maxtimers /* Translation error on this line: unexpected token at position 29. */ +.set vect_s16_shl.maxchanends,0; .global vect_s16_shl.maxchanends /* Translation error on this line: unexpected token at position 31. */ + +.globl vect_s32_shl +.type vect_s32_shl,@function +.set vect_s32_shl.nstackwords,NSTACKWORDS; .global vect_s32_shl.nstackwords /* Translation error on this line: unexpected token at position 41. */ +.set vect_s32_shl.maxcores,1; .global vect_s32_shl.maxcores /* Translation error on this line: unexpected token at position 28. */ +.set vect_s32_shl.maxtimers,0; .global vect_s32_shl.maxtimers /* Translation error on this line: unexpected token at position 29. */ +.set vect_s32_shl.maxchanends,0; .global vect_s32_shl.maxchanends /* Translation error on this line: unexpected token at position 31. */ + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sub.S b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sub.S new file mode 100644 index 00000000..da356ec8 --- /dev/null +++ b/lib_xcore_math/src/arch/vx4b/vect_sXX/vect_sub.S @@ -0,0 +1,151 @@ +// Copyright 2020-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + + + +#include "../asm_helper.h" + + +.text +.p2align 2 + + +#define NSTACKWORDS (32) + + +#define STACK_VEC_TMP (NSTACKWORDS-8-1) +#define STACK_BYTEMASK 8 + + +#define a x10 +#define b x11 +#define c x12 +#define len x13 +#define shr_b x18 +#define shr_c x19 +#define _32 x20 +#define tmp_vec x21 +#define bytemask len + + + + +/* +headroom_t vect_s16_sub( + int16_t a[], + const int16_t b[], + const int16_t c[], + const unsigned len, + const int b_shr, + const int c_shr); +*/ +vect_s16_sub: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + li t3, 0x100 + { slli t3, len, SIZEOF_LOG2_S16 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S16 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S16 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S16 \nMessage: The shift amount is not 32" */ + { nop ; xm.bu .L_apply_op } +.L_func_end_s16: + + + +/* +headroom_t vect_s32_sub( + int32_t a[], + const int32_t b[], + const int32_t c[], + const unsigned len, + const int b_shr, + const int c_shr); +*/ +vect_s32_sub: + xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + { li t3, 0 ; nop } + { slli t3, len, SIZEOF_LOG2_S32 ; xm.vsetc t3}/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shli t3, len, SIZEOF_LOG2_S32 \nMessage: The shift amount is not 32" */ + { xm.zexti t3, 5 ; srli len, len, EPV_LOG2_S32 }/* XAT Warning: "Falling back on assumption: the int != 32 for the integer value of the item at position 2 in the instruction's operands in shri len, len, EPV_LOG2_S32 \nMessage: The shift amount is not 32" */ + { nop ; xm.bu .L_apply_op } +.L_func_end_s32: + + + + + + +/* + Code shared by all functions above +*/ +.type .L_apply_op,@function +.L_apply_op: + + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + { li _32, 32 ; xm.vclrdr } + mv shr_c, a5 + mv shr_b, a4 + { xm.mkmsk t3, t3 ; nop } + { addi tmp_vec,sp, (NSTACKWORDS-8-1)*4 ; nop }/* Multiple XAT warnings: 'LDAWSP outside of known frame - offset may need correction', 'LDWSP outside of known frame - offset may need correction' */ + { xm.mkmski t3, 32 ; sw t3, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff len, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; xm.bu .L_loop_top } + +.p2align 4 +.L_loop_top: + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { add b, b, _32 ; nop } + { add c, c, _32 ; xm.vlsub tmp_vec} + { addi len, len, -1 ; xm.vstr a} + { add a, a, _32 ; xm.bt len, .L_loop_top } +.L_loop_bot: + + { nop ; lw bytemask, (STACK_BYTEMASK)*4 (sp)} + { nop ; xm.brff bytemask, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + xm.vlashr b, shr_b + xm.vstrpv tmp_vec, t3 + xm.vlashr c, shr_c + { mv t3, tmp_vec ; xm.vlsub tmp_vec} + { nop ; xm.vstd tmp_vec} + xm.vstrpv tmp_vec, bytemask + xm.vstrpv a, bytemask + { nop ; xm.vldr t3} + { nop ; xm.vstr t3} + +.L_finish: + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + + // Should work for both 16 and 32 bit modes + { li a0, 32 ; xm.vgetc t3} + { xm.zexti t3, 5 ; srli a1, t3, 8 } + { xm.shr a0, a0, a1 ; addi t3, t3, 1 } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + +.L_end_apply_op: +.size .L_apply_op, .L_end_apply_op - .L_apply_op + + + + +.global vect_s16_sub +.type vect_s16_sub,@function +.set vect_s16_sub.nstackwords,NSTACKWORDS; .global vect_s16_sub.nstackwords /* Translation error on this line: unexpected token at position 41. */ +.set vect_s16_sub.maxcores,1; .global vect_s16_sub.maxcores /* Translation error on this line: unexpected token at position 28. */ +.set vect_s16_sub.maxtimers,0; .global vect_s16_sub.maxtimers /* Translation error on this line: unexpected token at position 29. */ +.set vect_s16_sub.maxchanends,0; .global vect_s16_sub.maxchanends /* Translation error on this line: unexpected token at position 31. */ +.size vect_s16_sub, .L_func_end_s16 - vect_s16_sub + +.global vect_s32_sub +.type vect_s32_sub,@function +.set vect_s32_sub.nstackwords,NSTACKWORDS; .global vect_s32_sub.nstackwords /* Translation error on this line: unexpected token at position 41. */ +.set vect_s32_sub.maxcores,1; .global vect_s32_sub.maxcores /* Translation error on this line: unexpected token at position 28. */ +.set vect_s32_sub.maxtimers,0; .global vect_s32_sub.maxtimers /* Translation error on this line: unexpected token at position 29. */ +.set vect_s32_sub.maxchanends,0; .global vect_s32_sub.maxchanends /* Translation error on this line: unexpected token at position 31. */ +.size vect_s32_sub, .L_func_end_s32 - vect_s32_sub + + + + +#endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/xs3/asm_helper.h b/lib_xcore_math/src/arch/xs3/asm_helper.h index 02e41768..43d5bfc6 100644 --- a/lib_xcore_math/src/arch/xs3/asm_helper.h +++ b/lib_xcore_math/src/arch/xs3/asm_helper.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #ifndef ASM_HELPER_H_ diff --git a/lib_xcore_math/src/arch/xs3/chunk_s16/chunk_s16_accumulate.S b/lib_xcore_math/src/arch/xs3/chunk_s16/chunk_s16_accumulate.S index 09bd0a06..f6da547d 100644 --- a/lib_xcore_math/src/arch/xs3/chunk_s16/chunk_s16_accumulate.S +++ b/lib_xcore_math/src/arch/xs3/chunk_s16/chunk_s16_accumulate.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_dot.S b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_dot.S index efad32aa..30d3df59 100644 --- a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_dot.S +++ b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_dot.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_log.S b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_log.S index 094164bc..f73cb86c 100644 --- a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_log.S +++ b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_log.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series.S b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series.S index 64b382a0..0fe8002b 100644 --- a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series.S +++ b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series_v2.S b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series_v2.S index 2bb0c0e9..9ef5c7d7 100644 --- a/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series_v2.S +++ b/lib_xcore_math/src/arch/xs3/chunk_s32/chunk_s32_power_series_v2.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct12_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct12_s32.S index 25e83438..b2ced0e6 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/dct12_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct12_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct16_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct16_s32.S index a9fc575c..adb15b7d 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/dct16_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct16_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct24_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct24_s32.S index eb0dcf07..d08b2465 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/dct24_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct24_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct6_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct6_s32.S index 2e4abdf0..546306c6 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/dct6_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct6_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct8_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct8_s32.S index 534555b6..c08bf7f1 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/dct8_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct8_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct_adsb_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct_adsb_s32.S index 94bced07..2853a02b 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/dct_adsb_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct_adsb_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/dct_deconvolve_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/dct_deconvolve_s32.S index d4e1deb2..ffc63444 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/dct_deconvolve_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/dct_deconvolve_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct6_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct6_s32.S index bcc2c426..de947ea7 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/idct6_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct6_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct8_s32.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct8_s32.S index a865b14f..d609ce7b 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/idct8_s32.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct8_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct_adsb.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct_adsb.S index ba927a8e..b79242c0 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/idct_adsb.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct_adsb.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct_convolve.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct_convolve.S index eb688940..d721a528 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/idct_convolve.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct_convolve.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s32/idct_scale.S b/lib_xcore_math/src/arch/xs3/dct/s32/idct_scale.S index a1ebcff0..c05efeb5 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s32/idct_scale.S +++ b/lib_xcore_math/src/arch/xs3/dct/s32/idct_scale.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageA.S b/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageA.S index 029c8119..37379c0a 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageA.S +++ b/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageA.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageB.S b/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageB.S index a6fcc330..f92cca7a 100644 --- a/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageB.S +++ b/lib_xcore_math/src/arch/xs3/dct/s8/dct8x8_stageB.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/dct/vect_s32_flip.S b/lib_xcore_math/src/arch/xs3/dct/vect_s32_flip.S index 0f2dd69d..81a610eb 100644 --- a/lib_xcore_math/src/arch/xs3/dct/vect_s32_flip.S +++ b/lib_xcore_math/src/arch/xs3/dct/vect_s32_flip.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/fft/dif_fft.S b/lib_xcore_math/src/arch/xs3/fft/dif_fft.S index 6c0b3c70..8f2f465c 100644 --- a/lib_xcore_math/src/arch/xs3/fft/dif_fft.S +++ b/lib_xcore_math/src/arch/xs3/fft/dif_fft.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/fft/dit_fft.S b/lib_xcore_math/src/arch/xs3/fft/dit_fft.S index 6e78d42a..7eb48e8e 100644 --- a/lib_xcore_math/src/arch/xs3/fft/dit_fft.S +++ b/lib_xcore_math/src/arch/xs3/fft/dit_fft.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S b/lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S index 635f4b19..de8557ef 100644 --- a/lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S +++ b/lib_xcore_math/src/arch/xs3/fft/fft_hr_lut.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_index_bit_reversal.S b/lib_xcore_math/src/arch/xs3/fft/fft_index_bit_reversal.S index e5415e71..10f4fb1b 100644 --- a/lib_xcore_math/src/arch/xs3/fft/fft_index_bit_reversal.S +++ b/lib_xcore_math/src/arch/xs3/fft/fft_index_bit_reversal.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_mono_adjust.S b/lib_xcore_math/src/arch/xs3/fft/fft_mono_adjust.S index ab530a64..ece1b693 100644 --- a/lib_xcore_math/src/arch/xs3/fft/fft_mono_adjust.S +++ b/lib_xcore_math/src/arch/xs3/fft/fft_mono_adjust.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_spectra_merge.S b/lib_xcore_math/src/arch/xs3/fft/fft_spectra_merge.S index 03789067..32b89228 100644 --- a/lib_xcore_math/src/arch/xs3/fft/fft_spectra_merge.S +++ b/lib_xcore_math/src/arch/xs3/fft/fft_spectra_merge.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/fft/fft_spectra_split.S b/lib_xcore_math/src/arch/xs3/fft/fft_spectra_split.S index c7e334c0..2498ba0a 100644 --- a/lib_xcore_math/src/arch/xs3/fft/fft_spectra_split.S +++ b/lib_xcore_math/src/arch/xs3/fft/fft_spectra_split.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/fft/tail_reverse_complex_s32.S b/lib_xcore_math/src/arch/xs3/fft/tail_reverse_complex_s32.S index 2137c68e..0085c2f0 100644 --- a/lib_xcore_math/src/arch/xs3/fft/tail_reverse_complex_s32.S +++ b/lib_xcore_math/src/arch/xs3/fft/tail_reverse_complex_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/filter/filter_biquad_s32.S b/lib_xcore_math/src/arch/xs3/filter/filter_biquad_s32.S index 46d6d01a..451f4933 100644 --- a/lib_xcore_math/src/arch/xs3/filter/filter_biquad_s32.S +++ b/lib_xcore_math/src/arch/xs3/filter/filter_biquad_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/filter/filter_biquad_sat_s32.S b/lib_xcore_math/src/arch/xs3/filter/filter_biquad_sat_s32.S index fdebdc76..9baec71d 100644 --- a/lib_xcore_math/src/arch/xs3/filter/filter_biquad_sat_s32.S +++ b/lib_xcore_math/src/arch/xs3/filter/filter_biquad_sat_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/filter/filter_fir_s16.S b/lib_xcore_math/src/arch/xs3/filter/filter_fir_s16.S index fe18d3d6..c964c376 100644 --- a/lib_xcore_math/src/arch/xs3/filter/filter_fir_s16.S +++ b/lib_xcore_math/src/arch/xs3/filter/filter_fir_s16.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/filter/filter_fir_s32.S b/lib_xcore_math/src/arch/xs3/filter/filter_fir_s32.S index 4fe9615b..15df9e4c 100644 --- a/lib_xcore_math/src/arch/xs3/filter/filter_fir_s32.S +++ b/lib_xcore_math/src/arch/xs3/filter/filter_fir_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/filter/push_sample_down_s16.S b/lib_xcore_math/src/arch/xs3/filter/push_sample_down_s16.S index 17dd839d..bab4e4f3 100644 --- a/lib_xcore_math/src/arch/xs3/filter/push_sample_down_s16.S +++ b/lib_xcore_math/src/arch/xs3/filter/push_sample_down_s16.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/filter/push_sample_up_s16.S b/lib_xcore_math/src/arch/xs3/filter/push_sample_up_s16.S index 1085e9d4..71b71427 100644 --- a/lib_xcore_math/src/arch/xs3/filter/push_sample_up_s16.S +++ b/lib_xcore_math/src/arch/xs3/filter/push_sample_up_s16.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/filter/vect_s32_convolve_valid.S b/lib_xcore_math/src/arch/xs3/filter/vect_s32_convolve_valid.S index 246301e1..b47111db 100644 --- a/lib_xcore_math/src/arch/xs3/filter/vect_s32_convolve_valid.S +++ b/lib_xcore_math/src/arch/xs3/filter/vect_s32_convolve_valid.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/matrix/mat_mul_s8_x_s8_yield_s32.S b/lib_xcore_math/src/arch/xs3/matrix/mat_mul_s8_x_s8_yield_s32.S index 92574f50..2c61d51d 100644 --- a/lib_xcore_math/src/arch/xs3/matrix/mat_mul_s8_x_s8_yield_s32.S +++ b/lib_xcore_math/src/arch/xs3/matrix/mat_mul_s8_x_s8_yield_s32.S @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/misc/chunk_float_s32_log.S b/lib_xcore_math/src/arch/xs3/misc/chunk_float_s32_log.S index 8b6044f4..9398e73c 100644 --- a/lib_xcore_math/src/arch/xs3/misc/chunk_float_s32_log.S +++ b/lib_xcore_math/src/arch/xs3/misc/chunk_float_s32_log.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/misc/util.S b/lib_xcore_math/src/arch/xs3/misc/util.S index 63ed682c..9d0dd9bd 100644 --- a/lib_xcore_math/src/arch/xs3/misc/util.S +++ b/lib_xcore_math/src/arch/xs3/misc/util.S @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/misc/vect_copy.S b/lib_xcore_math/src/arch/xs3/misc/vect_copy.S index a503d6d4..93baf3b7 100644 --- a/lib_xcore_math/src/arch/xs3/misc/vect_copy.S +++ b/lib_xcore_math/src/arch/xs3/misc/vect_copy.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/misc/vect_float_s32_ln_prepare.S b/lib_xcore_math/src/arch/xs3/misc/vect_float_s32_ln_prepare.S index 64b700bb..f0679962 100644 --- a/lib_xcore_math/src/arch/xs3/misc/vect_float_s32_ln_prepare.S +++ b/lib_xcore_math/src/arch/xs3/misc/vect_float_s32_ln_prepare.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S b/lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S index b0b511e1..6a5432c1 100644 --- a/lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S +++ b/lib_xcore_math/src/arch/xs3/misc/xs3_memcpy.S @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/f32_log2.S b/lib_xcore_math/src/arch/xs3/scalar/f32_log2.S index 35748b68..a9d88120 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/f32_log2.S +++ b/lib_xcore_math/src/arch/xs3/scalar/f32_log2.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/f32_norm.S b/lib_xcore_math/src/arch/xs3/scalar/f32_norm.S index 6ebe81d9..9d0ace55 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/f32_norm.S +++ b/lib_xcore_math/src/arch/xs3/scalar/f32_norm.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/f32_power_series.S b/lib_xcore_math/src/arch/xs3/scalar/f32_power_series.S index 156fca0a..d916a7a7 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/f32_power_series.S +++ b/lib_xcore_math/src/arch/xs3/scalar/f32_power_series.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/f32_sin.S b/lib_xcore_math/src/arch/xs3/scalar/f32_sin.S index 3f15a749..c011540a 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/f32_sin.S +++ b/lib_xcore_math/src/arch/xs3/scalar/f32_sin.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/float_s32_exp.S b/lib_xcore_math/src/arch/xs3/scalar/float_s32_exp.S index 3f709bf1..c62ed64e 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/float_s32_exp.S +++ b/lib_xcore_math/src/arch/xs3/scalar/float_s32_exp.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/q24_logistic_fast.S b/lib_xcore_math/src/arch/xs3/scalar/q24_logistic_fast.S index 156d1f10..7aeb92b2 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/q24_logistic_fast.S +++ b/lib_xcore_math/src/arch/xs3/scalar/q24_logistic_fast.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/q30_exp_small.S b/lib_xcore_math/src/arch/xs3/scalar/q30_exp_small.S index 599a6e8b..6c01c82c 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/q30_exp_small.S +++ b/lib_xcore_math/src/arch/xs3/scalar/q30_exp_small.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/q30_odd_powers.S b/lib_xcore_math/src/arch/xs3/scalar/q30_odd_powers.S index 8ddb44ed..051513d7 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/q30_odd_powers.S +++ b/lib_xcore_math/src/arch/xs3/scalar/q30_odd_powers.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/q30_powers.S b/lib_xcore_math/src/arch/xs3/scalar/q30_powers.S index b565539f..9de099a0 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/q30_powers.S +++ b/lib_xcore_math/src/arch/xs3/scalar/q30_powers.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/radians_to_sbrads.S b/lib_xcore_math/src/arch/xs3/scalar/radians_to_sbrads.S index 78cd7c3f..d12c378e 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/radians_to_sbrads.S +++ b/lib_xcore_math/src/arch/xs3/scalar/radians_to_sbrads.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/sbrad_sin.S b/lib_xcore_math/src/arch/xs3/scalar/sbrad_sin.S index 1df51572..ef4d73d2 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/sbrad_sin.S +++ b/lib_xcore_math/src/arch/xs3/scalar/sbrad_sin.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/sbrad_tan.S b/lib_xcore_math/src/arch/xs3/scalar/sbrad_tan.S index 0bd61f5d..85b8ee2c 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/sbrad_tan.S +++ b/lib_xcore_math/src/arch/xs3/scalar/sbrad_tan.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s16.S b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s16.S index 5b830399..943bbe43 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s16.S +++ b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s16.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s32.S b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s32.S index 3f7bf89f..be5bdb19 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s32.S +++ b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s8.S b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s8.S index c689a974..f06692e1 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s8.S +++ b/lib_xcore_math/src/arch/xs3/scalar/scalar_op_s8.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S b/lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S index b370e901..5597568d 100644 --- a/lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S +++ b/lib_xcore_math/src/arch/xs3/scalar/sqrt_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_complex_scale.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_complex_scale.S index b06014d2..f7a422f7 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_complex_scale.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_complex_scale.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_macc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_macc.S index 05393044..8ca467fb 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_macc.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_macc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_nmacc.S index 4509b2c6..6aa1447e 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_nmacc.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conj_nmacc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conjugate_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conjugate_mul.S index 770c804d..91a8a58a 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conjugate_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_conjugate_mul.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_macc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_macc.S index db1fae4a..4731a8a2 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_macc.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_macc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mag.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mag.S index ab273f4d..0efd80d2 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mag.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mag.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mul.S index f04d616a..03de0c0e 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_mul.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_nmacc.S index e92155e8..0ae21976 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_nmacc.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_nmacc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_real_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_real_mul.S index 6d716f40..f86e6741 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_real_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_real_mul.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_squared_mag.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_squared_mag.S index 5347f775..9825ab06 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_squared_mag.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_squared_mag.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_sum.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_sum.S index 017dd28c..5f54a445 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_sum.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_sum.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_to_complex_s32.S b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_to_complex_s32.S index 09bdab64..8fc9d480 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_to_complex_s32.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s16/vect_complex_s16_to_complex_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_complex_scale.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_complex_scale.S index 34cb459a..355ac0b1 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_complex_scale.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_complex_scale.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_macc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_macc.S index 631dac76..8ec1da4d 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_macc.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_macc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_nmacc.S index 420edaa9..dec7733b 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_nmacc.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conj_nmacc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate.S index 32889824..c75a6251 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate_mul.S index 256c1e66..98b2d2d6 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_conjugate_mul.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_macc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_macc.S index fbd84c11..8736659f 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_macc.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_macc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mag.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mag.S index f711133a..4d8c7a01 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mag.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mag.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mul.S index 1e8d3b79..bff841b8 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_mul.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_nmacc.S index f8508227..7c544727 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_nmacc.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_nmacc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_real_mul.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_real_mul.S index 9cf62f18..8f15bd3d 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_real_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_real_mul.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_squared_mag.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_squared_mag.S index fa69691c..c9826a8b 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_squared_mag.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_squared_mag.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_sum.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_sum.S index 8e72e020..8d8c8c63 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_sum.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_sum.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_to_complex_s16.S b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_to_complex_s16.S index c8efa1ed..4b4feea1 100644 --- a/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_to_complex_s16.S +++ b/lib_xcore_math/src/arch/xs3/vect_complex_s32/vect_complex_s32_to_complex_s16.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_macc.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_macc.S index 2c94046d..42b8e433 100644 --- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_macc.S +++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_macc.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_mul.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_mul.S index c31a504e..a0edc432 100644 --- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_conj_mul.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_macc.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_macc.S index 1ffcd8b2..085d0c2f 100644 --- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_macc.S +++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_macc.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_mul.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_mul.S index 1159b8cf..cef37d6c 100644 --- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_complex_f32_mul.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_add.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_add.S index ab7600c0..df4444ad 100644 --- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_add.S +++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_add.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_dot.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_dot.S index a4ec16b1..e731ae0a 100644 --- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_dot.S +++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_dot.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_max_exponent.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_max_exponent.S index 14d63e55..cd0449e7 100644 --- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_max_exponent.S +++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_max_exponent.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_to_s32.S b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_to_s32.S index 77a8875e..1137dee0 100644 --- a/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_to_s32.S +++ b/lib_xcore_math/src/arch/xs3/vect_f32/vect_f32_to_s32.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_abs_sum.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_abs_sum.S index 47ee29a8..c6e6427e 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_abs_sum.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_abs_sum.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmax.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmax.S index dc38a5fb..dfb1ca0e 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmax.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmax.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmin.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmin.S index 76522edf..a02ca9a6 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmin.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_argmin.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_clip.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_clip.S index c4089989..b4a31ea9 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_clip.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_clip.S @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_dot.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_dot.S index 0e057b2b..026a5acb 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_dot.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_dot.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_energy.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_energy.S index 85ca35f4..b7965417 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_energy.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_energy.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_high_byte.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_high_byte.S index ade68b04..3300b280 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_high_byte.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_high_byte.S @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_low_byte.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_low_byte.S index 39c6f53f..b5363b29 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_low_byte.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_extract_low_byte.S @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_inverse.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_inverse.S index 24f3583d..9496cbfc 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_inverse.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_inverse.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_macc.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_macc.S index d65c5fc3..7fb8c9f0 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_macc.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_macc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_max.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_max.S index 6202bd04..4fdfd54e 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_max.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_max.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_min.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_min.S index 001ccc6b..965f5dd1 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_min.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_min.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_mul.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_mul.S index 898c58d2..c9c9c972 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_mul.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_nmacc.S index d47eb187..6cf1fda2 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_nmacc.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_nmacc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_scale.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_scale.S index 2cfe099f..510e3dc5 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_scale.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_scale.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sqrt.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sqrt.S index f79fe82c..1d1993f6 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sqrt.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sqrt.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sum.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sum.S index f3cd0881..7a99d001 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sum.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_sum.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_to_s32.S b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_to_s32.S index 50aeaec1..be5b7c40 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_to_s32.S +++ b/lib_xcore_math/src/arch/xs3/vect_s16/vect_s16_to_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/s32_to_chunk_s32.S b/lib_xcore_math/src/arch/xs3/vect_s32/s32_to_chunk_s32.S index 23615dd3..9521c203 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/s32_to_chunk_s32.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/s32_to_chunk_s32.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_abs_sum.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_abs_sum.S index a83b903d..578b82f3 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_abs_sum.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_abs_sum.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmax.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmax.S index ee0b02dd..e505f711 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmax.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmax.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmin.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmin.S index b766c200..92592d8c 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmin.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_argmin.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_clip.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_clip.S index d1198108..12611520 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_clip.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_clip.S @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_dot.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_dot.S index 6e11ebb6..0156a1fe 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_dot.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_dot.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_energy.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_energy.S index ef1cb34b..287ed4ac 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_energy.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_energy.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_inverse.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_inverse.S index 3d5973a0..418b1bdf 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_inverse.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_inverse.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_macc.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_macc.S index 376b0f30..cb885d89 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_macc.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_macc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_max.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_max.S index d3355f13..a25fec67 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_max.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_max.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_merge_accs.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_merge_accs.S index 54f2f0bc..e4128c73 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_merge_accs.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_merge_accs.S @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_min.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_min.S index ee95926d..c58d9018 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_min.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_min.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_mul.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_mul.S index b3b9f094..fdeaca07 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_mul.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_mul.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_nmacc.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_nmacc.S index 1ff4e1eb..0f3acce6 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_nmacc.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_nmacc.S @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_scale.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_scale.S index 43bd896e..e74e3b30 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_scale.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_scale.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_split_accs.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_split_accs.S index 2702e90a..a41e8562 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_split_accs.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_split_accs.S @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sqrt.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sqrt.S index 5b1f7a68..8e31813b 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sqrt.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sqrt.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sum.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sum.S index c34c41d6..2d0d9177 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sum.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_sum.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_f32.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_f32.S index 37a2ccbd..07950b3a 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_f32.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_f32.S @@ -1,4 +1,4 @@ -// Copyright 2022-2023 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_s16.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_s16.S index 4da673b4..f3fb3d22 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_s16.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_to_s16.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_unzip.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_unzip.S index 7b090876..2d647c62 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_unzip.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_unzip.S @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_zip.S b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_zip.S index dfb71846..a6770972 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_zip.S +++ b/lib_xcore_math/src/arch/xs3/vect_s32/vect_s32_zip.S @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/lib_xcore_math/src/arch/xs3/vect_s8/vect_s8_is_negative.S b/lib_xcore_math/src/arch/xs3/vect_s8/vect_s8_is_negative.S index b1d7cf5f..b3469266 100644 --- a/lib_xcore_math/src/arch/xs3/vect_s8/vect_s8_is_negative.S +++ b/lib_xcore_math/src/arch/xs3/vect_s8/vect_s8_is_negative.S @@ -1,4 +1,4 @@ -// Copyright 2021-2022 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_abs.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_abs.S index e5c8b891..ecf313aa 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_abs.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_abs.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_add.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_add.S index b842f50d..765a07ba 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_add.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_add.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_headroom.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_headroom.S index c6cd4d8c..a3504601 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_headroom.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_headroom.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_rect.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_rect.S index b68dafff..d848fb72 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_rect.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_rect.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_add_scalar.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_add_scalar.S index 6e808486..8be41f8c 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_add_scalar.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_add_scalar.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_max_elementwise.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_max_elementwise.S index ff40923b..85155ba6 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_max_elementwise.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_max_elementwise.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_min_elementwise.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_min_elementwise.S index 4fb2e8f6..27a8de50 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_min_elementwise.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sXX_min_elementwise.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_set.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_set.S index f7375317..38971029 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_set.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_set.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_shl.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_shl.S index fa9741a7..03899b71 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_shl.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_shl.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sub.S b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sub.S index 37cdb6bf..8081b6a9 100644 --- a/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sub.S +++ b/lib_xcore_math/src/arch/xs3/vect_sXX/vect_sub.S @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #if defined(__XS3A__) diff --git a/lib_xcore_math/src/bfp/bfp_alloc.c b/lib_xcore_math/src/bfp/bfp_alloc.c index 1271b3f3..ca9e2d84 100644 --- a/lib_xcore_math/src/bfp/bfp_alloc.c +++ b/lib_xcore_math/src/bfp/bfp_alloc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/bfp/bfp_complex_s16.c b/lib_xcore_math/src/bfp/bfp_complex_s16.c index b094254b..17844279 100644 --- a/lib_xcore_math/src/bfp/bfp_complex_s16.c +++ b/lib_xcore_math/src/bfp/bfp_complex_s16.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/bfp/bfp_complex_s32.c b/lib_xcore_math/src/bfp/bfp_complex_s32.c index 00f3f652..c41f8c1c 100644 --- a/lib_xcore_math/src/bfp/bfp_complex_s32.c +++ b/lib_xcore_math/src/bfp/bfp_complex_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/bfp/bfp_init.c b/lib_xcore_math/src/bfp/bfp_init.c index 72ec2557..c4fd17f8 100644 --- a/lib_xcore_math/src/bfp/bfp_init.c +++ b/lib_xcore_math/src/bfp/bfp_init.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/bfp/bfp_s16.c b/lib_xcore_math/src/bfp/bfp_s16.c index bdc8ddc9..ba41ea49 100644 --- a/lib_xcore_math/src/bfp/bfp_s16.c +++ b/lib_xcore_math/src/bfp/bfp_s16.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/bfp/bfp_s32.c b/lib_xcore_math/src/bfp/bfp_s32.c index 5887f7fd..83cc4854 100644 --- a/lib_xcore_math/src/bfp/bfp_s32.c +++ b/lib_xcore_math/src/bfp/bfp_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2025 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/bfp/misc/gradient_constraint.c b/lib_xcore_math/src/bfp/misc/gradient_constraint.c index 676bb361..054e9a19 100644 --- a/lib_xcore_math/src/bfp/misc/gradient_constraint.c +++ b/lib_xcore_math/src/bfp/misc/gradient_constraint.c @@ -1,4 +1,4 @@ -// Copyright 2021-2023 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/dct/dct8x8.c b/lib_xcore_math/src/dct/dct8x8.c index 2afec4d2..51fb5b81 100644 --- a/lib_xcore_math/src/dct/dct8x8.c +++ b/lib_xcore_math/src/dct/dct8x8.c @@ -1,4 +1,4 @@ -// Copyright 2022-2024 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/dct/dct_forward.c b/lib_xcore_math/src/dct/dct_forward.c index ca43f930..fbc6fdc5 100644 --- a/lib_xcore_math/src/dct/dct_forward.c +++ b/lib_xcore_math/src/dct/dct_forward.c @@ -1,4 +1,4 @@ -// Copyright 2022-2024 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/dct/dct_inverse.c b/lib_xcore_math/src/dct/dct_inverse.c index 4c3e47df..02978e8f 100644 --- a/lib_xcore_math/src/dct/dct_inverse.c +++ b/lib_xcore_math/src/dct/dct_inverse.c @@ -1,4 +1,4 @@ -// Copyright 2022-2024 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.c b/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.c index 448c78ab..3d20ab1f 100644 --- a/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.c +++ b/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "xmath_fft_lut.h" diff --git a/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.h b/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.h index 02c2e224..372c22d1 100644 --- a/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.h +++ b/lib_xcore_math/src/etc/xmath_fft_lut/xmath_fft_lut.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/src/fft/fft_bfp.c b/lib_xcore_math/src/fft/fft_bfp.c index 1ffc2400..2bde08a7 100644 --- a/lib_xcore_math/src/fft/fft_bfp.c +++ b/lib_xcore_math/src/fft/fft_bfp.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/fft/fft_f32.c b/lib_xcore_math/src/fft/fft_f32.c index 67444026..86669294 100644 --- a/lib_xcore_math/src/fft/fft_f32.c +++ b/lib_xcore_math/src/fft/fft_f32.c @@ -1,4 +1,4 @@ -// Copyright 2022 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/filter/filters.c b/lib_xcore_math/src/filter/filters.c index deceac66..9b71728b 100644 --- a/lib_xcore_math/src/filter/filters.c +++ b/lib_xcore_math/src/filter/filters.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/scalar/scalar_f32.c b/lib_xcore_math/src/scalar/scalar_f32.c index 985978e9..36f6b5b1 100644 --- a/lib_xcore_math/src/scalar/scalar_f32.c +++ b/lib_xcore_math/src/scalar/scalar_f32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/scalar/scalar_float_complex_sXX.c b/lib_xcore_math/src/scalar/scalar_float_complex_sXX.c index aaf99313..e9fc9b33 100644 --- a/lib_xcore_math/src/scalar/scalar_float_complex_sXX.c +++ b/lib_xcore_math/src/scalar/scalar_float_complex_sXX.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/scalar/scalar_float_s32.c b/lib_xcore_math/src/scalar/scalar_float_s32.c index f2f18184..0047a0c5 100644 --- a/lib_xcore_math/src/scalar/scalar_float_s32.c +++ b/lib_xcore_math/src/scalar/scalar_float_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/scalar/scalar_float_s64.c b/lib_xcore_math/src/scalar/scalar_float_s64.c index 120687be..e5b027be 100644 --- a/lib_xcore_math/src/scalar/scalar_float_s64.c +++ b/lib_xcore_math/src/scalar/scalar_float_s64.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/scalar/scalar_ops.c b/lib_xcore_math/src/scalar/scalar_ops.c index fdbfa473..75e44483 100644 --- a/lib_xcore_math/src/scalar/scalar_ops.c +++ b/lib_xcore_math/src/scalar/scalar_ops.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/scalar/scalar_qXX.c b/lib_xcore_math/src/scalar/scalar_qXX.c index f6c7bcd5..b34758eb 100644 --- a/lib_xcore_math/src/scalar/scalar_qXX.c +++ b/lib_xcore_math/src/scalar/scalar_qXX.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/chunk_s32.c b/lib_xcore_math/src/vect/chunk_s32.c index c49a1a76..9365630a 100644 --- a/lib_xcore_math/src/vect/chunk_s32.c +++ b/lib_xcore_math/src/vect/chunk_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/complex_prepare.c b/lib_xcore_math/src/vect/complex_prepare.c index ce049270..9c8b8121 100644 --- a/lib_xcore_math/src/vect/complex_prepare.c +++ b/lib_xcore_math/src/vect/complex_prepare.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/convolve.c b/lib_xcore_math/src/vect/convolve.c index 08451648..ff7080c1 100644 --- a/lib_xcore_math/src/vect/convolve.c +++ b/lib_xcore_math/src/vect/convolve.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/lib_xcore_math/src/vect/mat_mul.c b/lib_xcore_math/src/vect/mat_mul.c index 83b3d2cf..07a7efa2 100644 --- a/lib_xcore_math/src/vect/mat_mul.c +++ b/lib_xcore_math/src/vect/mat_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/prepare.c b/lib_xcore_math/src/vect/prepare.c index c31a14eb..2aec5656 100644 --- a/lib_xcore_math/src/vect/prepare.c +++ b/lib_xcore_math/src/vect/prepare.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/vect_complex_mag_rot_tables.c b/lib_xcore_math/src/vect/vect_complex_mag_rot_tables.c index 59d003ab..8a70e0c2 100644 --- a/lib_xcore_math/src/vect/vect_complex_mag_rot_tables.c +++ b/lib_xcore_math/src/vect/vect_complex_mag_rot_tables.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/vect_complex_s16.c b/lib_xcore_math/src/vect/vect_complex_s16.c index 010f26d8..65b35a64 100644 --- a/lib_xcore_math/src/vect/vect_complex_s16.c +++ b/lib_xcore_math/src/vect/vect_complex_s16.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/vect_complex_s32.c b/lib_xcore_math/src/vect/vect_complex_s32.c index 70aa655b..6d4fe35b 100644 --- a/lib_xcore_math/src/vect/vect_complex_s32.c +++ b/lib_xcore_math/src/vect/vect_complex_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/vect_f32.c b/lib_xcore_math/src/vect/vect_f32.c index 91517e9c..9abe6bf9 100644 --- a/lib_xcore_math/src/vect/vect_f32.c +++ b/lib_xcore_math/src/vect/vect_f32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/vect_float_s32.c b/lib_xcore_math/src/vect/vect_float_s32.c index 9389e012..544e9b8a 100644 --- a/lib_xcore_math/src/vect/vect_float_s32.c +++ b/lib_xcore_math/src/vect/vect_float_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/vect_s16.c b/lib_xcore_math/src/vect/vect_s16.c index d48ca756..179f7abe 100644 --- a/lib_xcore_math/src/vect/vect_s16.c +++ b/lib_xcore_math/src/vect/vect_s16.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/vect_s32.c b/lib_xcore_math/src/vect/vect_s32.c index 9b6a232d..99c41f6e 100644 --- a/lib_xcore_math/src/vect/vect_s32.c +++ b/lib_xcore_math/src/vect/vect_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/lib_xcore_math/src/vect/vpu_const_vects.c b/lib_xcore_math/src/vect/vpu_const_vects.c index 56d20aa7..852fd652 100644 --- a/lib_xcore_math/src/vect/vpu_const_vects.c +++ b/lib_xcore_math/src/vect/vpu_const_vects.c @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "vpu_const_vects.h" diff --git a/lib_xcore_math/src/vect/vpu_const_vects.h b/lib_xcore_math/src/vect/vpu_const_vects.h index 54808415..f1c57e46 100644 --- a/lib_xcore_math/src/vect/vpu_const_vects.h +++ b/lib_xcore_math/src/vect/vpu_const_vects.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/lib_xcore_math/src/vect/vpu_helper.h b/lib_xcore_math/src/vect/vpu_helper.h index cf2df0a1..ff9aee8b 100644 --- a/lib_xcore_math/src/vect/vpu_helper.h +++ b/lib_xcore_math/src/vect/vpu_helper.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 00000000..a807c36e --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,210 @@ +# Configurable variables (override on the make command line) +# Prefer system clang on macOS to avoid picking up XMOS toolchain clang +# UNAME_S := $(shell uname -s) +# ifeq ($(UNAME_S),Darwin) +# CC := /usr/bin/clang +# else +CC := clang +# endif +ROOT_DIR := $(shell pwd)/.. +ARCH ?= vx4b + +SRC_BFP_COMMON := \ + $(wildcard $(ROOT_DIR)/tests/bfp_tests/src/misc/*.c) \ + $(wildcard $(ROOT_DIR)/tests/bfp_tests/src/*.c) \ + $(wildcard $(ROOT_DIR)/tests/bfp_tests/src/bfp/*/*/*.c) \ + $(wildcard $(ROOT_DIR)/tests/bfp_tests/src/bfp/*/*.c) + +# Source files (common + arch-specific) +SRC_DCT_COMMON := \ + $(wildcard $(ROOT_DIR)/tests/dct_tests/src/lib_dsp/*.c) \ + $(wildcard $(ROOT_DIR)/tests/dct_tests/src/*.c) + +SRC_FFT_COMMON := \ + $(wildcard $(ROOT_DIR)/tests/fft_tests/src/*.c) + +SRC_FILTER_COMMON := \ + $(wildcard $(ROOT_DIR)/tests/dct_tests/src/filter/*.c) \ + $(wildcard $(ROOT_DIR)/tests/dct_tests/src/*.c) + +SRC_FILTER_COMMON := \ + $(wildcard $(ROOT_DIR)/tests/filter_tests/src/filter/*.c) \ + $(wildcard $(ROOT_DIR)/tests/filter_tests/src/*.c) + +SRC_SCALAR_COMMON := \ + $(wildcard $(ROOT_DIR)/tests/scalar_tests/src/basic/*.c) \ + $(wildcard $(ROOT_DIR)/tests/scalar_tests/src/float/*.c) \ + $(wildcard $(ROOT_DIR)/tests/scalar_tests/src/util/*.c) \ + $(wildcard $(ROOT_DIR)/tests/scalar_tests/src/*.c) + +SRC_VECT_COMMON := \ + $(wildcard $(ROOT_DIR)/tests/vect_tests/src/matrix/*.c) \ + $(wildcard $(ROOT_DIR)/tests/vect_tests/src/vect/*.c) \ + $(wildcard $(ROOT_DIR)/tests/vect_tests/src/vect/complex/*.c) \ + $(wildcard $(ROOT_DIR)/tests/vect_tests/src/vect/float/*.c) \ + $(wildcard $(ROOT_DIR)/tests/vect_tests/src/vect/stat/*.c) \ + $(wildcard $(ROOT_DIR)/tests/vect_tests/src/*.c) + +SRC_ARCH_vx4b := \ + $(wildcard $(ROOT_DIR)/../lib_unity/lib_unity/Unity/extras/*/src/*.c) \ + $(wildcard $(ROOT_DIR)/../lib_unity/lib_unity/Unity/src/*.c) \ + $(wildcard $(ROOT_DIR)/tests/shared/*/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/vect/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/scalar/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/filter/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/etc/xmath_fft_lut/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/fft/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/dct/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/bfp/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/bfp/*/*.c) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/**/*.S) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/*/*/*.S) \ + $(wildcard $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/*.c) \ + $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/vect_s16/vect_s16_dot.c \ + $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/chunk_s16/chunk_s16_accumulate.c \ + $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/scalar/float_s32.c \ + $(ROOT_DIR)/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c + + +# Exclude problematic assembly file from the arch source list +# (keeps rest of the wildcard-based list intact but removes this one file) +SRC_ARCH_vx4b := $(filter-out $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s16.S,$(SRC_ARCH_vx4b)) +SRC_ARCH_vx4b := $(filter-out $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s32.S,$(SRC_ARCH_vx4b)) +SRC_ARCH_vx4b := $(filter-out $(ROOT_DIR)/lib_xcore_math/src/arch/vx4b/scalar/scalar_op_s8.S,$(SRC_ARCH_vx4b)) + +# SRC_ARCH_vx4b += $(ROOT_DIR)/lib_xcore_math/lib_xcore_math/src/arch/ref/vpu_scalar_ops.c + +# Default include paths (adjust as required) +COMMON_INCLUDES := -I lib_nn/api \ + -I $(ROOT_DIR)/../lib_unity/lib_unity/Unity/extras/fixture/src \ + -I $(ROOT_DIR)/../lib_unity/lib_unity/Unity/src \ + -I $(ROOT_DIR)/../lib_unity/lib_unity/Unity/extras/memory/src \ + -I $(ROOT_DIR)/lib_xcore_math/api\ + -I $(ROOT_DIR)/tests/shared/pseudo_rand\ + -I $(ROOT_DIR)/tests/shared/testing\ + -I $(ROOT_DIR)/tests/shared/floating_fft\ + -I $(ROOT_DIR)/lib_xcore_math/src/etc/xmath_fft_lut \ + -I $(ROOT_DIR)/lib_xcore_math/src/vect + +# Arch-specific flags +ARCH_FLAGS_vx4b := -mcpu=xmos-vx4b -D__VX4B__ -DSMOKE_TEST + +# XMOS toolchain (used for assembling/linking vx4 targets) +XCC := /Applications/XMOS_XTC_0.2.0/riscv-toolchain/bin/clang + +# By default use CC for C compile. When building vx4/vx4b with the XMOS +# toolchain available, use the XMOS clang for C compile, assembling .S +# files and linking the final binaries so all object files target the +# same architecture (prevents mixed-format objects). +ASM_CC := $(CC) +LINK_CC := $(CC) +ifeq ($(findstring vx4,$(ARCH)),vx4) +ifneq ($(wildcard $(XCC)),) +CC := $(XCC) +ASM_CC := $(XCC) +LINK_CC := $(XCC) +endif +endif + +# Common compile flags +COMMON_FLAGS := -Os + +# Enable section-level GC: compile into individual sections and ask the linker +# to drop unused sections. These flags are safe for host builds but are +# essential for cross-built vx4 binaries to reduce final size. +COMMON_FLAGS += -ffunction-sections -fdata-sections -DUNITY_SUPPORT_64=1 -DUNITY_INCLUDE_DOUBLE + +# Linker flags (pass to the linker via the compiler driver) +LDFLAGS := -Wl,--gc-sections + +CFLAGS := $(COMMON_FLAGS) $(ARCH_FLAGS_$(ARCH)) $(COMMON_INCLUDES) -I $(ROOT_DIR)/tests/bfp_tests/src + +# Build directories +OBJDIR := $(ROOT_DIR)/build/$(ARCH)/obj +BINDIR := $(ROOT_DIR)/build/$(ARCH)/bin + +# Ensure arch-specific variable names resolve +SRC_ARCH := $(SRC_ARCH_$(ARCH)) + +# Helper: convert source list to object list under OBJDIR +define objs_from_src + $(patsubst %.c,%.o,$(patsubst %.S,%.o,$(patsubst $(ROOT_DIR)/%,$(OBJDIR)/%,$(1)))) +endef + +OBJ_BFP := $(call objs_from_src,$(SRC_BFP_COMMON) $(SRC_ARCH)) +OBJ_DCT := $(call objs_from_src,$(SRC_DCT_COMMON) $(SRC_ARCH)) +OBJ_FFT := $(call objs_from_src,$(SRC_FFT_COMMON) $(SRC_ARCH)) +OBJ_FILTER := $(call objs_from_src,$(SRC_FILTER_COMMON) $(SRC_ARCH)) +OBJ_SCALAR := $(call objs_from_src,$(SRC_SCALAR_COMMON) $(SRC_ARCH)) +OBJ_VECT := $(call objs_from_src,$(SRC_VECT_COMMON) $(SRC_ARCH)) + +# Pattern rules to build objects +$(OBJDIR)/%.o: $(ROOT_DIR)/%.c + @mkdir -p $(dir $@) + $(CC) -c $< -o $@ $(CFLAGS) + +$(OBJDIR)/%.o: $(ROOT_DIR)/%.S + @mkdir -p $(dir $@) + $(ASM_CC) -c $< -o $@ $(CFLAGS) + +# .PHONY: all build clean +all: filter bfp dct fft scalar vect +all: vect + +bfp: $(BINDIR)/bfp + + +$(BINDIR)/bfp: $(OBJ_BFP) + @mkdir -p $(dir $@) + $(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) + +dct: $(BINDIR)/dct + + +$(BINDIR)/dct: $(OBJ_DCT) + @mkdir -p $(dir $@) + $(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) + +fft: $(BINDIR)/fft + + +$(BINDIR)/fft: $(OBJ_FFT) + @mkdir -p $(dir $@) + $(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) + +filter: $(BINDIR)/filter + + +$(BINDIR)/filter: $(OBJ_FILTER) + @mkdir -p $(dir $@) + $(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) + +scalar: $(BINDIR)/scalar + + +$(BINDIR)/scalar: $(OBJ_SCALAR) + @mkdir -p $(dir $@) + $(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) + +vect: $(BINDIR)/vect + + +$(BINDIR)/vect: $(OBJ_VECT) + @mkdir -p $(dir $@) + $(LINK_CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) -I $(ROOT_DIR)/lib_xcore_math/src/vect -I $(ROOT_DIR)/tests/vect_tests/src + + +clean: + @echo "Cleaning $(ROOT_DIR)/build/$(ARCH)" + rm -rf $(ROOT_DIR)/build/$(ARCH) + +run: +# - xsim $(BINDIR)/bfp --config-file config.xml +# - xsim $(BINDIR)/dct --config-file config.xml +# - xsim $(BINDIR)/fft --config-file config.xml +# - xsim $(BINDIR)/filter --config-file config.xml + - xsim $(BINDIR)/scalar --config-file config.xml +# - xsim $(BINDIR)/vect --config-file config.xml + +trace: + - xsim $(BINDIR)/fft --config-file config.xml -t > trace.txt \ No newline at end of file diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_bitdepth_convert.c b/tests/bfp_tests/src/bfp/complex/test_bfp_bitdepth_convert.c index df3a564f..65146431 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_bitdepth_convert.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_bitdepth_convert.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add.c index 6bd94b8e..9db4d19f 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add_scalar.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add_scalar.c index a9f1d536..6e03db99 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add_scalar.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_add_scalar.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conj_macc.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conj_macc.c index d76312d6..36e4d60b 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conj_macc.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conj_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate.c index 390c1d7d..cebb341b 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate_mul.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate_mul.c index 8e71892c..1ef0814b 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate_mul.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_conjugate_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -91,8 +91,13 @@ TEST(bfp_complex_conj_mul, bfp_complex_s16_conj_mul) for(unsigned int i = 0; i < A.length; i++){ // printf("! %d\t %d \t %d \t %e\n", i, expA.real[i], A.real[i], Af.real[0]); - TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]); - TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]); + #else + TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]); + #endif } } } diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_energy.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_energy.c index 926b6a67..320e6b0b 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_energy.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_energy.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_macc.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_macc.c index de317aab..719acf74 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_macc.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 @@ -94,8 +94,13 @@ TEST(bfp_complex_macc, bfp_complex_s16_macc) test_complex_s16_from_double(expA.real, expA.imag, Af.real, Af.imag, LEN, A.exp); for(unsigned int i = 0; i < A.length; i++){ - TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]); - TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(1<<12, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(1<<12, expA.imag[i], A.imag[i]); + #else + TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]); + #endif } } } @@ -160,8 +165,13 @@ TEST(bfp_complex_macc, bfp_complex_s16_nmacc) test_complex_s16_from_double(expA.real, expA.imag, Af.real, Af.imag, LEN, A.exp); for(unsigned int i = 0; i < A.length; i++){ - TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]); - TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(1<<12, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(1<<12, expA.imag[i], A.imag[i]); + #else + TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]); + #endif } } } diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mag.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mag.c index 46001916..eaa6efb9 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mag.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mag.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_make.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_make.c index 867067ef..e3761900 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_make.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_make.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mul.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mul.c index 83e9e395..f202404d 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mul.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -89,8 +89,13 @@ TEST(bfp_complex_mul, bfp_complex_s16_mul) test_complex_s16_from_double(expA.real, expA.imag, Af.real, Af.imag, MAX_LEN, A.exp); for(unsigned int i = 0; i < A.length; i++){ - TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]); - TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]); + #else + TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]); + #endif } } } diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_mul.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_mul.c index 68758d16..ca19b148 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_mul.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_scale.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_scale.c index c522d033..ba9101c0 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_scale.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_real_scale.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_scale.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_scale.c index 1907107b..d6c455d4 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_scale.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_scale.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -98,9 +98,14 @@ TEST(bfp_complex_scale, bfp_complex_s16_scale) test_complex_s16_from_double(expA.real, expA.imag, Af.real, Af.imag, MAX_LEN, A.exp); - for(unsigned int i = 0; i < A.length; i++){ - TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]); - TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]); + for(unsigned int i = 0; i < A.length; i++){ + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(2, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(2, expA.imag[i], A.imag[i]); + #else + TEST_ASSERT_INT16_WITHIN(1, expA.real[i], A.real[i]); + TEST_ASSERT_INT16_WITHIN(1, expA.imag[i], A.imag[i]); + #endif } } } diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_squared_mag.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_squared_mag.c index 459ba2e0..31e9e788 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_squared_mag.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_squared_mag.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -80,8 +80,12 @@ TEST(bfp_complex_squared_mag, bfp_complex_s16_squared_mag) test_s16_from_double(expA, Af, MAX_LEN, A.exp); - for(unsigned int i = 0; i < A.length; i++){ - TEST_ASSERT_INT16_WITHIN(1, expA[i], A.data[i]); + for(unsigned int i = 0; i < A.length; i++){ + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(2, expA[i], A.data[i]); + #else + TEST_ASSERT_INT16_WITHIN(1, expA[i], A.data[i]); + #endif } diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_sub.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_sub.c index 7bc83ee7..118f03b2 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_sub.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_sub.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_use_exponent.c b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_use_exponent.c index fcddc778..6709977d 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_complex_use_exponent.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_complex_use_exponent.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/complex/test_bfp_sum_complex.c b/tests/bfp_tests/src/bfp/complex/test_bfp_sum_complex.c index 935146db..aff1b48c 100644 --- a/tests/bfp_tests/src/bfp/complex/test_bfp_sum_complex.c +++ b/tests/bfp_tests/src/bfp/complex/test_bfp_sum_complex.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -69,8 +69,13 @@ TEST(bfp_complex_sum, bfp_complex_s16_sum) float_complex_s32_t result = bfp_complex_s16_sum(&B); TEST_ASSERT_EQUAL(expected.exp, result.exp); - TEST_ASSERT_EQUAL_INT32(expected.mant.re, result.mant.re); - TEST_ASSERT_EQUAL_INT32(expected.mant.im, result.mant.im); + #if defined(__VX4B__) + TEST_ASSERT_INT32_WITHIN(8, expected.mant.re, result.mant.re); + TEST_ASSERT_INT32_WITHIN(8, expected.mant.im, result.mant.im); + #else + TEST_ASSERT_EQUAL_INT32(expected.mant.re, result.mant.re); + TEST_ASSERT_EQUAL_INT32(expected.mant.im, result.mant.im); + #endif } } diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_abs.c b/tests/bfp_tests/src/bfp/real/test_bfp_abs.c index d177350c..18df61d9 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_abs.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_abs.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -66,7 +66,11 @@ TEST(bfp_abs, bfp_s16_abs) for(unsigned int i = 0; i < A.length; i++){ int16_t expected = abs(B.data[i]); - TEST_ASSERT_EQUAL(expected, A.data[i]); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(1, expected, A.data[i]); + #else + TEST_ASSERT_EQUAL(expected, A.data[i]); + #endif } } } diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_abs_sum.c b/tests/bfp_tests/src/bfp/real/test_bfp_abs_sum.c index fcfc7cc6..9c70a507 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_abs_sum.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_abs_sum.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -67,8 +67,14 @@ TEST(bfp_abs_sum, bfp_s16_abs_sum) for(unsigned int i = 0; i < B.length; i++) expected.mant += abs(B.data[i]); - TEST_ASSERT_EQUAL(expected.exp, result.exp); - TEST_ASSERT_EQUAL_INT32(expected.mant, result.mant); + #if defined(__VX4B__) + TEST_ASSERT_INT32_WITHIN(1, expected.exp, result.exp); + TEST_ASSERT_INT32_WITHIN(12, expected.mant, result.mant); + #else + TEST_ASSERT_EQUAL(expected.exp, result.exp); + TEST_ASSERT_EQUAL_INT32(expected.mant, result.mant); + #endif + } } diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_add.c b/tests/bfp_tests/src/bfp/real/test_bfp_add.c index 2ed407e0..b05361e0 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_add.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_add.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_add_scalar.c b/tests/bfp_tests/src/bfp/real/test_bfp_add_scalar.c index b60e24a6..cd728413 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_add_scalar.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_add_scalar.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_alloc.c b/tests/bfp_tests/src/bfp/real/test_bfp_alloc.c index 93bf2e04..fde646fb 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_alloc.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_alloc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_argmax.c b/tests/bfp_tests/src/bfp/real/test_bfp_argmax.c index d9509c73..7f81e732 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_argmax.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_argmax.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_argmin.c b/tests/bfp_tests/src/bfp/real/test_bfp_argmin.c index 91ebc904..465c9b59 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_argmin.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_argmin.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_clip.c b/tests/bfp_tests/src/bfp/real/test_bfp_clip.c index 6e493465..66a08957 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_clip.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_clip.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_convolve.c b/tests/bfp_tests/src/bfp/real/test_bfp_convolve.c index 5d62ef5c..381a9c60 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_convolve.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_convolve.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_dealloc.c b/tests/bfp_tests/src/bfp/real/test_bfp_dealloc.c index 25da9381..1efcc2f6 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_dealloc.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_dealloc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_depth_convert.c b/tests/bfp_tests/src/bfp/real/test_bfp_depth_convert.c index 95c5bd3c..ab4b7b0f 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_depth_convert.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_depth_convert.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_dot.c b/tests/bfp_tests/src/bfp/real/test_bfp_dot.c index 0b6a269a..39db0bf6 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_dot.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_dot.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_energy.c b/tests/bfp_tests/src/bfp/real/test_bfp_energy.c index 5ad4604d..017ccbc6 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_energy.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_energy.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_headroom.c b/tests/bfp_tests/src/bfp/real/test_bfp_headroom.c index fc5d24da..0cb1d453 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_headroom.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_headroom.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_init.c b/tests/bfp_tests/src/bfp/real/test_bfp_init.c index 71888e46..bcc0a363 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_init.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_init.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_inverse.c b/tests/bfp_tests/src/bfp/real/test_bfp_inverse.c index 78f8fe92..22f4b2f3 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_inverse.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_inverse.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_macc.c b/tests/bfp_tests/src/bfp/real/test_bfp_macc.c index 217954d5..02c06837 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_macc.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_max.c b/tests/bfp_tests/src/bfp/real/test_bfp_max.c index 08c13c6b..b5bc07f5 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_max.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_max.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -56,7 +56,19 @@ TEST(bfp_max, bfp_s16_max) float result = bfp_s16_max(&B); - TEST_ASSERT_EQUAL_FLOAT(expected, result); + int16_t mantissa; + exponent_t exponent; + f32_unpack_s16(&mantissa, &exponent, result); + int16_t exp_mantissa; + exponent_t exp_exponent; + f32_unpack_s16(&exp_mantissa, &exp_exponent, expected); + + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(128, exp_mantissa, mantissa); + #else + TEST_ASSERT_INT16_WITHIN(1, exp_mantissa, mantissa); + #endif + TEST_ASSERT_EQUAL_INT16(exp_exponent, exponent); } } diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_max_elementwise.c b/tests/bfp_tests/src/bfp/real/test_bfp_max_elementwise.c index 5b3e7140..0e4fb9fd 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_max_elementwise.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_max_elementwise.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_mean.c b/tests/bfp_tests/src/bfp/real/test_bfp_mean.c index 49162206..2cc77bce 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_mean.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_mean.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -54,7 +54,12 @@ TEST(bfp_mean, bfp_s16_mean) double sum = 0; for(unsigned int i = 0; i < B.length; i++){ - B.data[i] = pseudo_rand_int16(&seed) >> B.hr; + B.data[i] = (pseudo_rand_int16(&seed) >> B.hr); + + //This is a simple way of bounding the error due to the new rounding mode in VX4B + #if defined(__VX4B__) + B.data[i] = B.data[i]&~1; + #endif sum += B.data[i]; } diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_min.c b/tests/bfp_tests/src/bfp/real/test_bfp_min.c index 05b5b8f8..4e68291f 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_min.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_min.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_min_elementwise.c b/tests/bfp_tests/src/bfp/real/test_bfp_min_elementwise.c index e0a27b36..f2b96d2b 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_min_elementwise.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_min_elementwise.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_mul.c b/tests/bfp_tests/src/bfp/real/test_bfp_mul.c index 8926c2bf..b7ab8d26 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_mul.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_rect.c b/tests/bfp_tests/src/bfp/real/test_bfp_rect.c index 96692e42..8048329f 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_rect.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_rect.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_rms.c b/tests/bfp_tests/src/bfp/real/test_bfp_rms.c index 1a032b54..c029b710 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_rms.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_rms.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -73,7 +73,7 @@ TEST(bfp_rms, bfp_s16_rms) const double expectedF = sqrt(mean_energy); float_s32_t ideal_result = { - .mant = lround( expectedF / ldexp((double) 1,result.exp) ), + .mant = llround( expectedF / ldexp((double) 1,result.exp) ), .exp = (exponent_t) floor( log2(expectedF) ) - 30 }; @@ -123,7 +123,7 @@ TEST(bfp_rms, bfp_s32_rms) const double expectedF = sqrt(mean_energy); float_s32_t ideal_result = { - .mant = lround( expectedF / ldexp((double) 1,result.exp) ), + .mant = llround( expectedF / ldexp((double) 1,result.exp) ), .exp = (exponent_t) floor( log2(expectedF) ) - 30 }; TEST_ASSERT_INT32_WITHIN(3, ideal_result.exp, result.exp); diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_s16_accumulate.c b/tests/bfp_tests/src/bfp/real/test_bfp_s16_accumulate.c index 31f941e5..db9ff53a 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_s16_accumulate.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_s16_accumulate.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_scale.c b/tests/bfp_tests/src/bfp/real/test_bfp_scale.c index 7b44b499..cf9e53dc 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_scale.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_scale.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_set.c b/tests/bfp_tests/src/bfp/real/test_bfp_set.c index ff0c29fb..d4d27ef1 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_set.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_set.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_shl_vect.c b/tests/bfp_tests/src/bfp/real/test_bfp_shl_vect.c index 8ecd0721..9617d381 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_shl_vect.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_shl_vect.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_sqrt_vect.c b/tests/bfp_tests/src/bfp/real/test_bfp_sqrt_vect.c index 21929a66..d352fc2a 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_sqrt_vect.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_sqrt_vect.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_sub.c b/tests/bfp_tests/src/bfp/real/test_bfp_sub.c index 39ead191..14c021e1 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_sub.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_sub.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_sum.c b/tests/bfp_tests/src/bfp/real/test_bfp_sum.c index b7cb9d34..136425ed 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_sum.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_sum.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -55,8 +55,16 @@ TEST(bfp_sum, bfp_s16_sum) for(unsigned int i = 0; i < B.length; i++) expected.mant += B.data[i]; - TEST_ASSERT_EQUAL(expected.exp, result.exp); - TEST_ASSERT_EQUAL_INT32(expected.mant, result.mant); + #if defined (__VX4B__) + // On VX, accumulation may differ by 1 due to different rounding behavior + TEST_ASSERT_INT32_WITHIN(1, expected.exp, result.exp); + TEST_ASSERT_INT32_WITHIN(12, expected.mant, result.mant); + #else + TEST_ASSERT_EQUAL(expected.exp, result.exp); + TEST_ASSERT_EQUAL_INT32(expected.mant, result.mant); + #endif + + } } diff --git a/tests/bfp_tests/src/bfp/real/test_bfp_use_exponent.c b/tests/bfp_tests/src/bfp/real/test_bfp_use_exponent.c index 567b7e7e..56c05926 100644 --- a/tests/bfp_tests/src/bfp/real/test_bfp_use_exponent.c +++ b/tests/bfp_tests/src/bfp/real/test_bfp_use_exponent.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/main.c b/tests/bfp_tests/src/main.c index ae81cd6f..46ed3ff4 100644 --- a/tests/bfp_tests/src/main.c +++ b/tests/bfp_tests/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/misc/test_bfp_gradient_constraint.c b/tests/bfp_tests/src/misc/test_bfp_gradient_constraint.c index bc6a2fbf..60690cb0 100644 --- a/tests/bfp_tests/src/misc/test_bfp_gradient_constraint.c +++ b/tests/bfp_tests/src/misc/test_bfp_gradient_constraint.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/tst_asserts.h b/tests/bfp_tests/src/tst_asserts.h index afb0a68d..f037bb92 100644 --- a/tests/bfp_tests/src/tst_asserts.h +++ b/tests/bfp_tests/src/tst_asserts.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/bfp_tests/src/tst_common.c b/tests/bfp_tests/src/tst_common.c index 47429aa7..e99674f9 100644 --- a/tests/bfp_tests/src/tst_common.c +++ b/tests/bfp_tests/src/tst_common.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/bfp_tests/src/tst_common.h b/tests/bfp_tests/src/tst_common.h index 39e7210b..fdb3df07 100644 --- a/tests/bfp_tests/src/tst_common.h +++ b/tests/bfp_tests/src/tst_common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/bfp_tests/src/unity_config.h b/tests/bfp_tests/src/unity_config.h index 3b0cc6a1..c19827fa 100644 --- a/tests/bfp_tests/src/unity_config.h +++ b/tests/bfp_tests/src/unity_config.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/config.xml b/tests/config.xml new file mode 100644 index 00000000..646ced42 --- /dev/null +++ b/tests/config.xml @@ -0,0 +1,555 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/dct_tests/src/lib_dsp/dsp_dct.c b/tests/dct_tests/src/lib_dsp/dsp_dct.c index 066bd9e3..8286aa5d 100644 --- a/tests/dct_tests/src/lib_dsp/dsp_dct.c +++ b/tests/dct_tests/src/lib_dsp/dsp_dct.c @@ -1,4 +1,4 @@ -// Copyright 2015-2024 XMOS LIMITED. +// Copyright 2015-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "dsp_dct.h" diff --git a/tests/dct_tests/src/lib_dsp/dsp_dct.h b/tests/dct_tests/src/lib_dsp/dsp_dct.h index 87f46361..e3c30197 100644 --- a/tests/dct_tests/src/lib_dsp/dsp_dct.h +++ b/tests/dct_tests/src/lib_dsp/dsp_dct.h @@ -1,4 +1,4 @@ -// Copyright 2015-2024 XMOS LIMITED. +// Copyright 2015-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #ifndef DSP_DCT_H_ diff --git a/tests/dct_tests/src/main.c b/tests/dct_tests/src/main.c index 0983d3ba..c67caa7a 100644 --- a/tests/dct_tests/src/main.c +++ b/tests/dct_tests/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/dct_tests/src/test_dct8x8.c b/tests/dct_tests/src/test_dct8x8.c index bba5b73d..4330b64d 100644 --- a/tests/dct_tests/src/test_dct8x8.c +++ b/tests/dct_tests/src/test_dct8x8.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/dct_tests/src/test_dctXX_forward.c b/tests/dct_tests/src/test_dctXX_forward.c index aebf2b71..ccc98f18 100644 --- a/tests/dct_tests/src/test_dctXX_forward.c +++ b/tests/dct_tests/src/test_dctXX_forward.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/dct_tests/src/test_dctXX_inverse.c b/tests/dct_tests/src/test_dctXX_inverse.c index 13f7372f..47a63973 100644 --- a/tests/dct_tests/src/test_dctXX_inverse.c +++ b/tests/dct_tests/src/test_dctXX_inverse.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. @@ -81,7 +81,7 @@ TEST(dctXX_inverse, dct6_inverse) int32_t max_allowed_diff = DCT_N; for(unsigned int n = 0; n < DCT_N; n++){ int32_t act_val = y[n]; - int32_t ref_val = lround(ref_out[n]); + int32_t ref_val = llround(ref_out[n]); TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val); } @@ -151,7 +151,7 @@ TEST(dctXX_inverse, dct8_inverse) int32_t max_allowed_diff = DCT_N; for(unsigned int n = 0; n < DCT_N; n++){ int32_t act_val = y[n]; - int32_t ref_val = lround(ref_out[n]); + int32_t ref_val = llround(ref_out[n]); TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val); } @@ -220,7 +220,7 @@ TEST(dctXX_inverse, dct12_inverse) int32_t max_allowed_diff = 8; for(unsigned int n = 0; n < DCT_N; n++){ int32_t act_val = y[n]; - int32_t ref_val = lround(ref_out[n]); + int32_t ref_val = llround(ref_out[n]); TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val); } @@ -290,7 +290,7 @@ TEST(dctXX_inverse, dct16_inverse) int32_t max_allowed_diff = 2*DCT_N; for(unsigned int n = 0; n < DCT_N; n++){ int32_t act_val = y[n]; - int32_t ref_val = lround(ref_out[n]); + int32_t ref_val = llround(ref_out[n]); TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val); } @@ -360,7 +360,7 @@ TEST(dctXX_inverse, dct24_inverse) int32_t max_allowed_diff = 2*DCT_N; for(unsigned int n = 0; n < DCT_N; n++){ int32_t act_val = y[n]; - int32_t ref_val = lround(ref_out[n]); + int32_t ref_val = llround(ref_out[n]); TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val); } @@ -430,7 +430,7 @@ TEST(dctXX_inverse, dct32_inverse) int32_t max_allowed_diff = 2*DCT_N; for(unsigned int n = 0; n < DCT_N; n++){ int32_t act_val = y[n]; - int32_t ref_val = lround(ref_out[n]); + int32_t ref_val = llround(ref_out[n]); TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val); } @@ -500,7 +500,7 @@ TEST(dctXX_inverse, dct48_inverse) int32_t max_allowed_diff = 5*DCT_N; for(unsigned int n = 0; n < DCT_N; n++){ int32_t act_val = y[n]; - int32_t ref_val = lround(ref_out[n]); + int32_t ref_val = llround(ref_out[n]); TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val); } @@ -570,7 +570,7 @@ TEST(dctXX_inverse, dct64_inverse) int32_t max_allowed_diff = 6*DCT_N; for(unsigned int n = 0; n < DCT_N; n++){ int32_t act_val = y[n]; - int32_t ref_val = lround(ref_out[n]); + int32_t ref_val = llround(ref_out[n]); TEST_ASSERT_INT32_WITHIN(max_allowed_diff, ref_val, act_val); } diff --git a/tests/dct_tests/src/test_random.h b/tests/dct_tests/src/test_random.h index 0778d791..ebb02463 100644 --- a/tests/dct_tests/src/test_random.h +++ b/tests/dct_tests/src/test_random.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/dct_tests/src/tst_common.c b/tests/dct_tests/src/tst_common.c index 4428b77b..dc996c07 100644 --- a/tests/dct_tests/src/tst_common.c +++ b/tests/dct_tests/src/tst_common.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "tst_common.h" diff --git a/tests/dct_tests/src/tst_common.h b/tests/dct_tests/src/tst_common.h index e8040c81..c0d034aa 100644 --- a/tests/dct_tests/src/tst_common.h +++ b/tests/dct_tests/src/tst_common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/dct_tests/src/unity_config.h b/tests/dct_tests/src/unity_config.h index 42edb77d..e1eca455 100644 --- a/tests/dct_tests/src/unity_config.h +++ b/tests/dct_tests/src/unity_config.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/fft_tests/src/main.c b/tests/fft_tests/src/main.c index 3c8bda22..554e412c 100644 --- a/tests/fft_tests/src/main.c +++ b/tests/fft_tests/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/fft_tests/src/test_bfp_fft.c b/tests/fft_tests/src/test_bfp_fft.c index 8ebd9f14..d2bd9a30 100644 --- a/tests/fft_tests/src/test_bfp_fft.c +++ b/tests/fft_tests/src/test_bfp_fft.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/fft_tests/src/test_bfp_pack_unpack.c b/tests/fft_tests/src/test_bfp_pack_unpack.c index d31e8b48..4ece5260 100644 --- a/tests/fft_tests/src/test_bfp_pack_unpack.c +++ b/tests/fft_tests/src/test_bfp_pack_unpack.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/fft_tests/src/test_fft_dif.c b/tests/fft_tests/src/test_fft_dif.c index 12396a6c..3ac83d81 100644 --- a/tests/fft_tests/src/test_fft_dif.c +++ b/tests/fft_tests/src/test_fft_dif.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "xmath/xmath.h" diff --git a/tests/fft_tests/src/test_fft_dit.c b/tests/fft_tests/src/test_fft_dit.c index 2d9ce593..710fc1e4 100644 --- a/tests/fft_tests/src/test_fft_dit.c +++ b/tests/fft_tests/src/test_fft_dit.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/fft_tests/src/test_fft_helpers.c b/tests/fft_tests/src/test_fft_helpers.c index c9fa33bf..95a9fd08 100644 --- a/tests/fft_tests/src/test_fft_helpers.c +++ b/tests/fft_tests/src/test_fft_helpers.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/fft_tests/src/test_fft_mono_adjust.c b/tests/fft_tests/src/test_fft_mono_adjust.c index 41a51110..eedea943 100644 --- a/tests/fft_tests/src/test_fft_mono_adjust.c +++ b/tests/fft_tests/src/test_fft_mono_adjust.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/fft_tests/src/test_issue96.c b/tests/fft_tests/src/test_issue96.c index 51e5e9da..e4537431 100644 --- a/tests/fft_tests/src/test_issue96.c +++ b/tests/fft_tests/src/test_issue96.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/fft_tests/src/test_random.h b/tests/fft_tests/src/test_random.h index 0778d791..ebb02463 100644 --- a/tests/fft_tests/src/test_random.h +++ b/tests/fft_tests/src/test_random.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/fft_tests/src/test_vect_f32_fft.c b/tests/fft_tests/src/test_vect_f32_fft.c index f3b9db77..dad8b4d8 100644 --- a/tests/fft_tests/src/test_vect_f32_fft.c +++ b/tests/fft_tests/src/test_vect_f32_fft.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/fft_tests/src/tst_common.c b/tests/fft_tests/src/tst_common.c index 4428b77b..dc996c07 100644 --- a/tests/fft_tests/src/tst_common.c +++ b/tests/fft_tests/src/tst_common.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "tst_common.h" diff --git a/tests/fft_tests/src/tst_common.h b/tests/fft_tests/src/tst_common.h index 69757502..61308439 100644 --- a/tests/fft_tests/src/tst_common.h +++ b/tests/fft_tests/src/tst_common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/fft_tests/src/unity_config.h b/tests/fft_tests/src/unity_config.h index 42edb77d..e1eca455 100644 --- a/tests/fft_tests/src/unity_config.h +++ b/tests/fft_tests/src/unity_config.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/filter_tests/script/test_filter_biquad_s32_case3.py b/tests/filter_tests/script/test_filter_biquad_s32_case3.py index 65d72afe..786c25ea 100644 --- a/tests/filter_tests/script/test_filter_biquad_s32_case3.py +++ b/tests/filter_tests/script/test_filter_biquad_s32_case3.py @@ -1,4 +1,4 @@ -# Copyright 2020-2024 XMOS LIMITED. +# Copyright 2020-2026 XMOS LIMITED. # This Software is subject to the terms of the XMOS Public Licence: Version 1. import numpy as np diff --git a/tests/filter_tests/script/test_filter_biquad_sat_s32_case3.py b/tests/filter_tests/script/test_filter_biquad_sat_s32_case3.py index a58a3b55..7fa1f74a 100644 --- a/tests/filter_tests/script/test_filter_biquad_sat_s32_case3.py +++ b/tests/filter_tests/script/test_filter_biquad_sat_s32_case3.py @@ -1,4 +1,4 @@ -# Copyright 2024 XMOS LIMITED. +# Copyright 2024-2026 XMOS LIMITED. # This Software is subject to the terms of the XMOS Public Licence: Version 1. import numpy as np import test_filter_biquad_s32_case3 as ts diff --git a/tests/filter_tests/src/filter/test_filter_biquad_s32.c b/tests/filter_tests/src/filter/test_filter_biquad_s32.c index 0b8de2ae..a98beee5 100644 --- a/tests/filter_tests/src/filter/test_filter_biquad_s32.c +++ b/tests/filter_tests/src/filter/test_filter_biquad_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/filter_tests/src/filter/test_filter_biquad_sat_s32.c b/tests/filter_tests/src/filter/test_filter_biquad_sat_s32.c index ea19b4bc..8bfde22d 100644 --- a/tests/filter_tests/src/filter/test_filter_biquad_sat_s32.c +++ b/tests/filter_tests/src/filter/test_filter_biquad_sat_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -179,10 +179,10 @@ TEST(filter_biquad_sat_s32, case4) // this should saturate as it's already 2**31-1 res = filter_biquad_sat_s32(&filter, INT32_MAX); - TEST_ASSERT_EQUAL(INT32_MAX, res); + TEST_ASSERT_EQUAL(VPU_INT32_MAX, res); res = filter_biquad_sat_s32(&filter, INT32_MIN); - TEST_ASSERT_EQUAL(INT32_MIN + 1, res); + TEST_ASSERT_EQUAL(VPU_INT32_MIN, res); } // Test a biquad that overflows halfway through the accumulator, diff --git a/tests/filter_tests/src/filter/test_filter_fir_s16.c b/tests/filter_tests/src/filter/test_filter_fir_s16.c index 7abd884d..26f7ce36 100644 --- a/tests/filter_tests/src/filter/test_filter_fir_s16.c +++ b/tests/filter_tests/src/filter/test_filter_fir_s16.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/filter_tests/src/filter/test_filter_fir_s16_push_sample.c b/tests/filter_tests/src/filter/test_filter_fir_s16_push_sample.c index b476b328..de4baea6 100644 --- a/tests/filter_tests/src/filter/test_filter_fir_s16_push_sample.c +++ b/tests/filter_tests/src/filter/test_filter_fir_s16_push_sample.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/filter_tests/src/filter/test_filter_fir_s32.c b/tests/filter_tests/src/filter/test_filter_fir_s32.c index 9a43d91f..2b961aac 100644 --- a/tests/filter_tests/src/filter/test_filter_fir_s32.c +++ b/tests/filter_tests/src/filter/test_filter_fir_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/filter_tests/src/main.c b/tests/filter_tests/src/main.c index 832b40e2..93ad91b2 100644 --- a/tests/filter_tests/src/main.c +++ b/tests/filter_tests/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/filter_tests/src/tst_common.h b/tests/filter_tests/src/tst_common.h index 44c74d09..f2bf976d 100644 --- a/tests/filter_tests/src/tst_common.h +++ b/tests/filter_tests/src/tst_common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/filter_tests/src/unity_config.h b/tests/filter_tests/src/unity_config.h index 42edb77d..e1eca455 100644 --- a/tests/filter_tests/src/unity_config.h +++ b/tests/filter_tests/src/unity_config.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/legacy_build/src/main.c b/tests/legacy_build/src/main.c index 6a969ff4..4bde6f95 100644 --- a/tests/legacy_build/src/main.c +++ b/tests/legacy_build/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/basic/test_cls.c b/tests/scalar_tests/src/basic/test_cls.c index b4468170..f2778a8e 100644 --- a/tests/scalar_tests/src/basic/test_cls.c +++ b/tests/scalar_tests/src/basic/test_cls.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/basic/test_hr.c b/tests/scalar_tests/src/basic/test_hr.c index fcdef154..5988a28e 100644 --- a/tests/scalar_tests/src/basic/test_hr.c +++ b/tests/scalar_tests/src/basic/test_hr.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_fixed_trig.c b/tests/scalar_tests/src/float/test_fixed_trig.c index eade7d5c..0dd45d9a 100644 --- a/tests/scalar_tests/src/float/test_fixed_trig.c +++ b/tests/scalar_tests/src/float/test_fixed_trig.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -121,7 +121,7 @@ TEST(fixed_trig, sbrad_sin) volatile int32_t result_q30 = sbrad_sin(alpha); // volatile uint32_t t3 = get_reference_time(); - int32_t exp_q30 = lround(ldexp(exp,30)); + int32_t exp_q30 = llround(ldexp(exp,30)); int32_t er = exp_q30 - result_q30; er = (er < 0)? -er : er; @@ -184,7 +184,7 @@ TEST(fixed_trig, sbrad_tan) volatile q2_30 result_q30 = sbrad_tan(alpha_q31); // volatile uint32_t t3 = get_reference_time(); - q2_30 expected_q30 = lround(ldexp(expected,30)); + q2_30 expected_q30 = llround(ldexp(expected,30)); // q2_30 just_for_timing_q30 = round(ldexp(just_for_timing, 30)); @@ -235,7 +235,7 @@ TEST(fixed_trig, q24_sin) volatile q2_30 result_q30 = q24_sin(theta_q24); // volatile uint32_t t3 = get_reference_time(); - q2_30 exp_q30 = lround(ldexp(exp,30)); + q2_30 exp_q30 = llround(ldexp(exp,30)); q2_30 er = exp_q30 - result_q30; er = (er < 0)? -er : er; @@ -284,7 +284,7 @@ TEST(fixed_trig, q24_cos) volatile q2_30 result_q30 = q24_cos(theta_q24); // volatile uint32_t t3 = get_reference_time(); - q2_30 exp_q30 = lround(ldexp(exp,30)); + q2_30 exp_q30 = llround(ldexp(exp,30)); q2_30 er = exp_q30 - result_q30; er = (er < 0)? -er : er; @@ -331,7 +331,7 @@ TEST(fixed_trig, q24_tan) volatile float_s32_t result = q24_tan(theta_q24); // volatile uint32_t t3 = get_reference_time(); - int32_t exp_fixed = lround(ldexp(exp,-result.exp)); + int32_t exp_fixed = llround(ldexp(exp,-result.exp)); if(result.exp != -30) TEST_ASSERT_LESS_THAN_INT32(2, HR_S32(result.mant)); @@ -350,8 +350,8 @@ TEST(fixed_trig, q24_tan) // If we're really that close to the singular point, let's invert both the // expected result and actual result to see whether the output of sbrad_tan() // was very close to what it should have been. - int32_t exp_inv_q30 = lround(ldexp(1/exp, 30)); - int32_t act_inv_q30 = lround(ldexp(1/ldexp(result.mant, result.exp), 30)); + int32_t exp_inv_q30 = llround(ldexp(1/exp, 30)); + int32_t act_inv_q30 = llround(ldexp(1/ldexp(result.mant, result.exp), 30)); TEST_ASSERT_INT32_WITHIN(100, exp_inv_q30, act_inv_q30); diff --git a/tests/scalar_tests/src/float/test_float_convert.c b/tests/scalar_tests/src/float/test_float_convert.c index b8f6e6bf..f850e6cd 100644 --- a/tests/scalar_tests/src/float/test_float_convert.c +++ b/tests/scalar_tests/src/float/test_float_convert.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_exp.c b/tests/scalar_tests/src/float/test_float_exp.c index 3b777b4f..a1d5697c 100644 --- a/tests/scalar_tests/src/float/test_float_exp.c +++ b/tests/scalar_tests/src/float/test_float_exp.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -66,7 +66,7 @@ TEST(float_exp, float_s32_exp_SPECIFIC_CASES) // exp(0.0) = 1.0 x.mant = Q24(0.0); - x.exp = -24; + x.exp = -23; res = float_s32_exp(x); TEST_ASSERT( diff_ratio(res, 1.0) < ldexp(1,-24) ); @@ -123,7 +123,7 @@ TEST(float_exp, float_s32_exp_RANDOM) float_s32_t actual = float_s32_exp(x); double expected_f = exp(ldexp(x.mant, x.exp)); - int32_t expected_mant = lround(ldexp(expected_f,-actual.exp)); + int32_t expected_mant = llround(ldexp(expected_f,-actual.exp)); if(expected_mant == 0){ // Just make sure our answer is real close to zero. diff --git a/tests/scalar_tests/src/float/test_float_log.c b/tests/scalar_tests/src/float/test_float_log.c index 5589ca0d..0536ab43 100644 --- a/tests/scalar_tests/src/float/test_float_log.c +++ b/tests/scalar_tests/src/float/test_float_log.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_logistic.c b/tests/scalar_tests/src/float/test_float_logistic.c index 1cf3b66e..32b2dcc3 100644 --- a/tests/scalar_tests/src/float/test_float_logistic.c +++ b/tests/scalar_tests/src/float/test_float_logistic.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_s32_sqrt.c b/tests/scalar_tests/src/float/test_float_s32_sqrt.c index 2e5b604b..3a8d34d8 100644 --- a/tests/scalar_tests/src/float/test_float_s32_sqrt.c +++ b/tests/scalar_tests/src/float/test_float_s32_sqrt.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_sXX_abs.c b/tests/scalar_tests/src/float/test_float_sXX_abs.c index c2e1da17..fa9e8339 100644 --- a/tests/scalar_tests/src/float/test_float_sXX_abs.c +++ b/tests/scalar_tests/src/float/test_float_sXX_abs.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_sXX_add.c b/tests/scalar_tests/src/float/test_float_sXX_add.c index 01db3a02..12dd27a9 100644 --- a/tests/scalar_tests/src/float/test_float_sXX_add.c +++ b/tests/scalar_tests/src/float/test_float_sXX_add.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_sXX_div.c b/tests/scalar_tests/src/float/test_float_sXX_div.c index 4fb1b693..f9703454 100644 --- a/tests/scalar_tests/src/float/test_float_sXX_div.c +++ b/tests/scalar_tests/src/float/test_float_sXX_div.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_sXX_ema.c b/tests/scalar_tests/src/float/test_float_sXX_ema.c index 482f3f31..c1643cd2 100644 --- a/tests/scalar_tests/src/float/test_float_sXX_ema.c +++ b/tests/scalar_tests/src/float/test_float_sXX_ema.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_sXX_gt.c b/tests/scalar_tests/src/float/test_float_sXX_gt.c index f476d398..f8bf09d6 100644 --- a/tests/scalar_tests/src/float/test_float_sXX_gt.c +++ b/tests/scalar_tests/src/float/test_float_sXX_gt.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_sXX_mul.c b/tests/scalar_tests/src/float/test_float_sXX_mul.c index ee9c54d0..d74aa527 100644 --- a/tests/scalar_tests/src/float/test_float_sXX_mul.c +++ b/tests/scalar_tests/src/float/test_float_sXX_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_sXX_sub.c b/tests/scalar_tests/src/float/test_float_sXX_sub.c index 6de05e47..b4d4f568 100644 --- a/tests/scalar_tests/src/float/test_float_sXX_sub.c +++ b/tests/scalar_tests/src/float/test_float_sXX_sub.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_float_trig.c b/tests/scalar_tests/src/float/test_float_trig.c index a9aa494d..4bbf3bd2 100644 --- a/tests/scalar_tests/src/float/test_float_trig.c +++ b/tests/scalar_tests/src/float/test_float_trig.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/float/test_q30_powers.c b/tests/scalar_tests/src/float/test_q30_powers.c index e2564eb4..60dd20f7 100644 --- a/tests/scalar_tests/src/float/test_q30_powers.c +++ b/tests/scalar_tests/src/float/test_q30_powers.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -46,7 +46,7 @@ TEST(q30_powers, q30_powers) double pow = 1.0; for(unsigned int i = 1; i < length; i++){ pow *= bf; - expected[i] = lround(ldexp(pow, 30)); + expected[i] = llround(ldexp(pow, 30)); } // volatile uint32_t t0 = get_reference_time(); diff --git a/tests/scalar_tests/src/main.c b/tests/scalar_tests/src/main.c index 7423816b..e847afed 100644 --- a/tests/scalar_tests/src/main.c +++ b/tests/scalar_tests/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/scalar_tests/src/tst_asserts.h b/tests/scalar_tests/src/tst_asserts.h index 07a01c5e..676c84b1 100644 --- a/tests/scalar_tests/src/tst_asserts.h +++ b/tests/scalar_tests/src/tst_asserts.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/scalar_tests/src/tst_common.c b/tests/scalar_tests/src/tst_common.c index f9a6fae6..048d5058 100644 --- a/tests/scalar_tests/src/tst_common.c +++ b/tests/scalar_tests/src/tst_common.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "tst_common.h" diff --git a/tests/scalar_tests/src/tst_common.h b/tests/scalar_tests/src/tst_common.h index c0b6016f..c57c15fd 100644 --- a/tests/scalar_tests/src/tst_common.h +++ b/tests/scalar_tests/src/tst_common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/scalar_tests/src/unity_config.h b/tests/scalar_tests/src/unity_config.h index 2998718d..c0699a63 100644 --- a/tests/scalar_tests/src/unity_config.h +++ b/tests/scalar_tests/src/unity_config.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/scalar_tests/src/util/test_s32_sqrt.c b/tests/scalar_tests/src/util/test_s32_sqrt.c index d7a8d3c9..efd094c9 100644 --- a/tests/scalar_tests/src/util/test_s32_sqrt.c +++ b/tests/scalar_tests/src/util/test_s32_sqrt.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/util/test_sXX_inverse.c b/tests/scalar_tests/src/util/test_sXX_inverse.c index 1a058055..9386494c 100644 --- a/tests/scalar_tests/src/util/test_sXX_inverse.c +++ b/tests/scalar_tests/src/util/test_sXX_inverse.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/scalar_tests/src/util/test_sXX_mul.c b/tests/scalar_tests/src/util/test_sXX_mul.c index 587e5a8d..79635c5e 100644 --- a/tests/scalar_tests/src/util/test_sXX_mul.c +++ b/tests/scalar_tests/src/util/test_sXX_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -60,7 +60,7 @@ TEST(sXX_mul, s16_mul) double Ef = Bf * Cf; - int16_t expected = (int16_t) lround( ldexp(Ef, -a_exp) ); + int16_t expected = (int16_t) llround( ldexp(Ef, -a_exp) ); TEST_ASSERT_INT16_WITHIN_MESSAGE(2, expected, A, ""); @@ -98,7 +98,7 @@ TEST(sXX_mul, s32_mul) double Ef = Bf * Cf; - int32_t expected = lround( ldexp(Ef, -a_exp) ); + int32_t expected = llround( ldexp(Ef, -a_exp) ); TEST_ASSERT_INT32_WITHIN_MESSAGE(2, expected, A, ""); diff --git a/tests/shared/floating_fft/floating_dct.c b/tests/shared/floating_fft/floating_dct.c index 8081c8ee..2d693dae 100644 --- a/tests/shared/floating_fft/floating_dct.c +++ b/tests/shared/floating_fft/floating_dct.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/shared/floating_fft/floating_dct.h b/tests/shared/floating_fft/floating_dct.h index 3915e85d..5a306f5a 100644 --- a/tests/shared/floating_fft/floating_dct.h +++ b/tests/shared/floating_fft/floating_dct.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/shared/floating_fft/floating_fft.h b/tests/shared/floating_fft/floating_fft.h index e0894fad..2911a808 100644 --- a/tests/shared/floating_fft/floating_fft.h +++ b/tests/shared/floating_fft/floating_fft.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/shared/floating_fft/floating_fft_double.c b/tests/shared/floating_fft/floating_fft_double.c index 50d32870..62d41508 100644 --- a/tests/shared/floating_fft/floating_fft_double.c +++ b/tests/shared/floating_fft/floating_fft_double.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/shared/floating_fft/floating_fft_float.c b/tests/shared/floating_fft/floating_fft_float.c index a44f2b65..ca7a558b 100644 --- a/tests/shared/floating_fft/floating_fft_float.c +++ b/tests/shared/floating_fft/floating_fft_float.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/shared/floating_fft/floating_fft_util.c b/tests/shared/floating_fft/floating_fft_util.c index e2dc4d4a..dd9a711e 100644 --- a/tests/shared/floating_fft/floating_fft_util.c +++ b/tests/shared/floating_fft/floating_fft_util.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/shared/pseudo_rand/pseudo_rand.c b/tests/shared/pseudo_rand/pseudo_rand.c index f3230bd0..18d9b4df 100644 --- a/tests/shared/pseudo_rand/pseudo_rand.c +++ b/tests/shared/pseudo_rand/pseudo_rand.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/tests/shared/pseudo_rand/pseudo_rand.h b/tests/shared/pseudo_rand/pseudo_rand.h index 143b9832..47df5cb6 100644 --- a/tests/shared/pseudo_rand/pseudo_rand.h +++ b/tests/shared/pseudo_rand/pseudo_rand.h @@ -1,4 +1,4 @@ -// Copyright 2021-2024 XMOS LIMITED. +// Copyright 2021-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/shared/pseudo_rand/rand_frame.c b/tests/shared/pseudo_rand/rand_frame.c index 3367c052..912dc79e 100644 --- a/tests/shared/pseudo_rand/rand_frame.c +++ b/tests/shared/pseudo_rand/rand_frame.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "rand_frame.h" diff --git a/tests/shared/pseudo_rand/rand_frame.h b/tests/shared/pseudo_rand/rand_frame.h index f191fe50..2fa3e88d 100644 --- a/tests/shared/pseudo_rand/rand_frame.h +++ b/tests/shared/pseudo_rand/rand_frame.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/tests/shared/testing/testing.h b/tests/shared/testing/testing.h index b104af95..2003c0ea 100644 --- a/tests/shared/testing/testing.h +++ b/tests/shared/testing/testing.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/shared/testing/testing_conv.c b/tests/shared/testing/testing_conv.c index 4e67b917..d4753e98 100644 --- a/tests/shared/testing/testing_conv.c +++ b/tests/shared/testing/testing_conv.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "testing.h" diff --git a/tests/shared/testing/testing_diff.c b/tests/shared/testing/testing_diff.c index 4b696b10..d437027e 100644 --- a/tests/shared/testing/testing_diff.c +++ b/tests/shared/testing/testing_diff.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "testing.h" diff --git a/tests/shared/testing/testing_misc.c b/tests/shared/testing/testing_misc.c index 65fb0012..a5cb09ad 100644 --- a/tests/shared/testing/testing_misc.c +++ b/tests/shared/testing/testing_misc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/tests/shared/testing/testing_print.c b/tests/shared/testing/testing_print.c index 29e0e2ad..55573c20 100644 --- a/tests/shared/testing/testing_print.c +++ b/tests/shared/testing/testing_print.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "testing.h" diff --git a/tests/vect_tests/src/main.c b/tests/vect_tests/src/main.c index 2f6562c7..37b53347 100644 --- a/tests/vect_tests/src/main.c +++ b/tests/vect_tests/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s16_yield_s32.c b/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s16_yield_s32.c index dc774d7f..639ae813 100644 --- a/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s16_yield_s32.c +++ b/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s16_yield_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s8.c b/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s8.c index eb6d9025..7b78e879 100644 --- a/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s8.c +++ b/tests/vect_tests/src/matrix/test_mat_mul_s8_x_s8.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/tst_asserts.h b/tests/vect_tests/src/tst_asserts.h index 1ad12abf..d7a8eedd 100644 --- a/tests/vect_tests/src/tst_asserts.h +++ b/tests/vect_tests/src/tst_asserts.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/vect_tests/src/tst_common.c b/tests/vect_tests/src/tst_common.c index f9a6fae6..048d5058 100644 --- a/tests/vect_tests/src/tst_common.c +++ b/tests/vect_tests/src/tst_common.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "tst_common.h" diff --git a/tests/vect_tests/src/tst_common.h b/tests/vect_tests/src/tst_common.h index fc7ba5bd..ec0249d5 100644 --- a/tests/vect_tests/src/tst_common.h +++ b/tests/vect_tests/src/tst_common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/vect_tests/src/unity_config.h b/tests/vect_tests/src/unity_config.h index 3b0cc6a1..c19827fa 100644 --- a/tests/vect_tests/src/unity_config.h +++ b/tests/vect_tests/src/unity_config.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_add.c b/tests/vect_tests/src/vect/complex/test_vect_complex_add.c index 6dfda7f9..531b1fa8 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_add.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_add.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_add_scalar.c b/tests/vect_tests/src/vect/complex/test_vect_complex_add_scalar.c index 489f3fff..f6ec85bd 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_add_scalar.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_add_scalar.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_complex_scale.c b/tests/vect_tests/src/vect/complex/test_vect_complex_complex_scale.c index 0d7d5a07..10368937 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_complex_scale.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_complex_scale.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -15,6 +15,8 @@ #include "unity_fixture.h" +#define INT16_WIGGLE 4 + TEST_GROUP_RUNNER(vect_complex_scale) { RUN_TEST_CASE(vect_complex_scale, vect_complex_s32_scale_random); RUN_TEST_CASE(vect_complex_scale, vect_complex_s32_scale_basic); @@ -168,8 +170,13 @@ TEST(vect_complex_scale, vect_complex_s16_scale_basic) headroom_t hrre, hrim; for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line); @@ -181,8 +188,13 @@ TEST(vect_complex_scale, vect_complex_s16_scale_basic) len, casse->sat); for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line); @@ -243,8 +255,13 @@ TEST(vect_complex_scale, vect_complex_s16_scale_random) for(unsigned int i = 0; i < len; i++){ complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C.re, C.im, sat); - TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); - TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); + TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v); @@ -257,8 +274,13 @@ TEST(vect_complex_scale, vect_complex_s16_scale_random) for(unsigned int i = 0; i < len; i++){ complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C.re, C.im, sat); - TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); - TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); + TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v); diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_conj_macc.c b/tests/vect_tests/src/vect/complex/test_vect_complex_conj_macc.c index 35fc6e41..f68c6270 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_conj_macc.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_conj_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate.c b/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate.c index de99a738..a75ec0a4 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate_mul.c b/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate_mul.c index 7d427a4c..74ce805a 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate_mul.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_conjugate_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -15,6 +15,7 @@ #include "unity_fixture.h" +#define INT16_WIGGLE 4 TEST_GROUP_RUNNER(vect_complex_conj_mul) { RUN_TEST_CASE(vect_complex_conj_mul, vect_complex_s16_conj_mul_basic); @@ -170,9 +171,14 @@ TEST(vect_complex_conj_mul, vect_complex_s16_conj_mul_basic) len, casse->sat); headroom_t hrre, hrim; - for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line); + for(unsigned int i = 0; i < len; i++){ + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line); @@ -184,8 +190,13 @@ TEST(vect_complex_conj_mul, vect_complex_s16_conj_mul_basic) len, casse->sat); for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line); @@ -245,8 +256,13 @@ TEST(vect_complex_conj_mul, vect_complex_s16_conj_mul_random) for(unsigned int i = 0; i < len; i++){ complex_s16_t expected = mul_complex_conj_s16(B.real[i], B.imag[i], C.real[i], C.imag[i], sat); - TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); - TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); + TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v); @@ -259,8 +275,13 @@ TEST(vect_complex_conj_mul, vect_complex_s16_conj_mul_random) for(unsigned int i = 0; i < len; i++){ complex_s16_t expected = mul_complex_conj_s16(B.real[i], B.imag[i], C.real[i], C.imag[i], sat); - TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); - TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); + TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v); diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_macc.c b/tests/vect_tests/src/vect/complex/test_vect_complex_macc.c index 5e432a39..e243cfd2 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_macc.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_mag.c b/tests/vect_tests/src/vect/complex/test_vect_complex_mag.c index da198324..87e11f97 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_mag.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_mag.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -67,7 +67,7 @@ static int16_t mag_complex_s16(complex_s16_t b, right_shift_t b_shr) double mag = sqrt(sqr_mag); - int16_t a = (int16_t) lround(mag); + int16_t a = (int16_t) llround(mag); return SAT(16)(a); } @@ -88,7 +88,7 @@ static int32_t mag_complex_s32(complex_s32_t b, right_shift_t b_shr) double mag = sqrt((double) sqr_mag); - int32_t a = lround(mag); + int32_t a = llround(mag); return SAT(32)(a); } diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_mul.c b/tests/vect_tests/src/vect/complex/test_vect_complex_mul.c index 824b43dd..39c39743 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_mul.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -15,6 +15,7 @@ #include "unity_fixture.h" +#define INT16_WIGGLE 4 TEST_GROUP_RUNNER(vect_complex_mul) { RUN_TEST_CASE(vect_complex_mul, vect_complex_s16_mul_prepare) @@ -248,8 +249,13 @@ TEST(vect_complex_mul, vect_complex_s16_mul_basic) headroom_t hrre, hrim; for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line); @@ -261,8 +267,13 @@ TEST(vect_complex_mul, vect_complex_s16_mul_basic) len, casse->sat); for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[0], casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[0], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, casse->line); @@ -319,8 +330,13 @@ TEST(vect_complex_mul, vect_complex_s16_mul_random) for(unsigned int i = 0; i < len; i++){ complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C.real[i], C.imag[i], sat); - TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); - TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); + TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v); @@ -333,8 +349,13 @@ TEST(vect_complex_mul, vect_complex_s16_mul_random) for(unsigned int i = 0; i < len; i++){ complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C.real[i], C.imag[i], sat); - TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); - TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); + TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #endif } hrre = vect_s16_headroom(A.real, len); hrim = vect_s16_headroom(A.imag, len); TEST_ASSERT_EQUAL_MSG((hrre <= hrim)? hrre : hrim, hr, v); diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_real_mul.c b/tests/vect_tests/src/vect/complex/test_vect_complex_real_mul.c index 99f1532d..577bc05f 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_real_mul.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_real_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -15,6 +15,8 @@ #include "unity_fixture.h" +#define INT16_WIGGLE 4 + TEST_GROUP_RUNNER(vect_complex_real_mul) { RUN_TEST_CASE(vect_complex_real_mul, vect_complex_s16_real_mul_prepare); RUN_TEST_CASE(vect_complex_real_mul, vect_complex_s16_real_mul_basic); @@ -164,9 +166,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_basic) //Verify mul_complex_s16() is correct. It's used in other test cases. complex_s16_t tmp = mul_complex_s16(casse->value.b.re, casse->value.b.im, casse->value.c, casse->sat); - - TEST_ASSERT_EQUAL_MSG(casse->expected.re, tmp.re, casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, tmp.im, casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, tmp.re); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, tmp.im); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, tmp.re, casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, tmp.im, casse->line); + #endif unsigned lengths[] = {1, 4, 16, 32, 40 }; @@ -192,8 +198,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_basic) C, len, casse->sat); for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #endif } headroom_t exp_hr = vect_complex_s16_headroom(A.real, A.imag, len); @@ -205,8 +216,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_basic) C, len, casse->sat); for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); - TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected.re, A.real[i], casse->line); + TEST_ASSERT_EQUAL_MSG(casse->expected.im, A.imag[i], casse->line); + #endif } exp_hr = vect_complex_s16_headroom(A.real, A.imag, len); @@ -263,8 +279,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_random) for(unsigned int i = 0; i < len; i++){ complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C[i], sat); - TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); - TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); + TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #endif } TEST_ASSERT_EQUAL_MSG( vect_complex_s16_headroom(A.real,A.imag,len), hr, v); @@ -276,8 +297,13 @@ TEST(vect_complex_real_mul, vect_complex_s16_real_mul_random) for(unsigned int i = 0; i < len; i++){ complex_s16_t expected = mul_complex_s16(B.real[i], B.imag[i], C[i], sat); - TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); - TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.re, A.real[i]); + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected.im, A.imag[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected.re, A.real[i], msg_buff); + TEST_ASSERT_EQUAL_MESSAGE(expected.im, A.imag[i], msg_buff); + #endif } TEST_ASSERT_EQUAL_MSG( vect_complex_s16_headroom(A.real,A.imag,len), hr, v); diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_real_scale.c b/tests/vect_tests/src/vect/complex/test_vect_complex_real_scale.c index 7e5e6b01..7d41e0d9 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_real_scale.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_real_scale.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -136,7 +136,7 @@ TEST(vect_complex_real_scale, vect_complex_s16_real_scale) TEST_ASSERT_INT16_WITHIN(1, exp, A.real[i]); - exp = lround(ldexp( ((int32_t)B.imag[i]) * C, -sat) + ldexp(1, -40)); + exp = llround(ldexp( ((int32_t)B.imag[i]) * C, -sat) + ldexp(1, -40)); exp = MIN(exp, VPU_INT16_MAX); exp = MAX(exp, VPU_INT16_MIN); TEST_ASSERT_INT16_WITHIN(1, exp, A.imag[i]); @@ -162,7 +162,7 @@ TEST(vect_complex_real_scale, vect_complex_s16_real_scale) exp = MAX(exp, VPU_INT16_MIN); TEST_ASSERT_INT16_WITHIN(1, exp, A.real[i]); - exp = lround(ldexp( ((int32_t)B.imag[i]) * C, -sat) + ldexp(1, -40)); + exp = llround(ldexp( ((int32_t)B.imag[i]) * C, -sat) + ldexp(1, -40)); exp = MIN(exp, VPU_INT16_MAX); exp = MAX(exp, VPU_INT16_MIN); TEST_ASSERT_INT16_WITHIN(1, exp, A.imag[i]); diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_s16_to_complex_s32.c b/tests/vect_tests/src/vect/complex/test_vect_complex_s16_to_complex_s32.c index 03a56290..615b2eb6 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_s16_to_complex_s32.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_s16_to_complex_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_s32_to_complex_s16.c b/tests/vect_tests/src/vect/complex/test_vect_complex_s32_to_complex_s16.c index 2f513796..090883b2 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_s32_to_complex_s16.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_s32_to_complex_s16.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_squared_mag.c b/tests/vect_tests/src/vect/complex/test_vect_complex_squared_mag.c index 713009cf..26ca2613 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_squared_mag.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_squared_mag.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -15,6 +15,8 @@ #include "unity_fixture.h" +#define INT16_WIGGLE 4 + TEST_GROUP_RUNNER(vect_complex_squared_mag) { RUN_TEST_CASE(vect_complex_squared_mag, vect_complex_s16_squared_mag_prepare); RUN_TEST_CASE(vect_complex_squared_mag, vect_complex_s32_squared_mag_prepare); @@ -245,7 +247,11 @@ TEST(vect_complex_squared_mag, vect_complex_s16_squared_mag_basic) hr = vect_complex_s16_squared_mag(A, B.real, B.imag, len, casse->sat); for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected, A[i], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, casse->expected, A[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected, A[i], casse->line); + #endif } TEST_ASSERT_EQUAL_MSG(vect_s16_headroom(A, len), hr, casse->line); @@ -308,6 +314,13 @@ TEST(vect_complex_squared_mag, vect_complex_s16_squared_mag_random) TEST_ASSERT_EQUAL_MSG_FMT(expected, A[i], "(test vect %d) (len: %u) (index %d): (mag(%d + i*%d))**2 >> %d", v, len, i, B.real[i], B.imag[i], sat); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(INT16_WIGGLE, expected, A[i]); + #else + TEST_ASSERT_EQUAL_MSG_FMT(expected, A[i], + "(test vect %d) (len: %u) (index %d): (mag(%d + i*%d))**2 >> %d", + v, len, i, B.real[i], B.imag[i], sat); + #endif } TEST_ASSERT_EQUAL_MSG(vect_s16_headroom(A, len), hr, v); } diff --git a/tests/vect_tests/src/vect/complex/test_vect_complex_sub.c b/tests/vect_tests/src/vect/complex/test_vect_complex_sub.c index 93dabd29..1f25b126 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_complex_sub.c +++ b/tests/vect_tests/src/vect/complex/test_vect_complex_sub.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/complex/test_vect_sum_complex.c b/tests/vect_tests/src/vect/complex/test_vect_sum_complex.c index a6aaeb91..b11ef2e8 100644 --- a/tests/vect_tests/src/vect/complex/test_vect_sum_complex.c +++ b/tests/vect_tests/src/vect/complex/test_vect_sum_complex.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -157,8 +157,14 @@ TEST(vect_complex_sum, vect_complex_s16_sum_basic) ((int32_t) casse->b.re) * len, ((int32_t) casse->b.im) * len}; - TEST_ASSERT_EQUAL_MSG(exp.re, result.re, casse->line); - TEST_ASSERT_EQUAL_MSG(exp.im, result.im, casse->line); + #if defined(__VX4B__) + //this casts to 32 bit because it night not fit in 16 bits (due to rounding) + TEST_ASSERT_INT32_WITHIN(4, exp.re, result.re); + TEST_ASSERT_INT32_WITHIN(4, exp.im, result.im); + #else + TEST_ASSERT_EQUAL_MSG(exp.re, result.re, casse->line); + TEST_ASSERT_EQUAL_MSG(exp.im, result.im, casse->line); + #endif } } } @@ -201,8 +207,14 @@ TEST(vect_complex_sum, vect_complex_s16_sum_random) result = vect_complex_s16_sum(B.real, B.imag, len); - TEST_ASSERT_EQUAL(expected.re, result.re); - TEST_ASSERT_EQUAL(expected.im, result.im); + #if defined(__VX4B__) + //this casts to 32 bit because it night not fit in 16 bits (due to rounding) + TEST_ASSERT_INT32_WITHIN(4, expected.re, result.re); + TEST_ASSERT_INT32_WITHIN(4, expected.im, result.im); + #else + TEST_ASSERT_EQUAL(expected.re, result.re); + TEST_ASSERT_EQUAL(expected.im, result.im); + #endif } } diff --git a/tests/vect_tests/src/vect/float/test_vect_complex_f32_macc.c b/tests/vect_tests/src/vect/float/test_vect_complex_f32_macc.c index 47fbd1dc..a5e5bdd8 100644 --- a/tests/vect_tests/src/vect/float/test_vect_complex_f32_macc.c +++ b/tests/vect_tests/src/vect/float/test_vect_complex_f32_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/float/test_vect_complex_f32_mul.c b/tests/vect_tests/src/vect/float/test_vect_complex_f32_mul.c index 97521280..2135f8b1 100644 --- a/tests/vect_tests/src/vect/float/test_vect_complex_f32_mul.c +++ b/tests/vect_tests/src/vect/float/test_vect_complex_f32_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/float/test_vect_f32_add.c b/tests/vect_tests/src/vect/float/test_vect_f32_add.c index 82e00b3b..a6c07be3 100644 --- a/tests/vect_tests/src/vect/float/test_vect_f32_add.c +++ b/tests/vect_tests/src/vect/float/test_vect_f32_add.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/float/test_vect_f32_dot.c b/tests/vect_tests/src/vect/float/test_vect_f32_dot.c index fa6a1c45..1b716cee 100644 --- a/tests/vect_tests/src/vect/float/test_vect_f32_dot.c +++ b/tests/vect_tests/src/vect/float/test_vect_f32_dot.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/float/test_vect_f32_max_exponent.c b/tests/vect_tests/src/vect/float/test_vect_f32_max_exponent.c index dbf643e3..61fe0cd8 100644 --- a/tests/vect_tests/src/vect/float/test_vect_f32_max_exponent.c +++ b/tests/vect_tests/src/vect/float/test_vect_f32_max_exponent.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/float/test_vect_f32_to_s32.c b/tests/vect_tests/src/vect/float/test_vect_f32_to_s32.c index 83bc81f0..d9823844 100644 --- a/tests/vect_tests/src/vect/float/test_vect_f32_to_s32.c +++ b/tests/vect_tests/src/vect/float/test_vect_f32_to_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/float/test_vect_s32_to_f32.c b/tests/vect_tests/src/vect/float/test_vect_s32_to_f32.c index 5344066f..d9d8a4b8 100644 --- a/tests/vect_tests/src/vect/float/test_vect_s32_to_f32.c +++ b/tests/vect_tests/src/vect/float/test_vect_s32_to_f32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/stat/test_vect_abs_sum.c b/tests/vect_tests/src/vect/stat/test_vect_abs_sum.c index 77e5a021..6ec955eb 100644 --- a/tests/vect_tests/src/vect/stat/test_vect_abs_sum.c +++ b/tests/vect_tests/src/vect/stat/test_vect_abs_sum.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -83,11 +83,15 @@ TEST(vect_abs_sum, vect_s16_abs_sum_basic) t = (t>=0)? t : -t; // t = (t>=0)? t : 0x7FFF; // because -1*(-0x8000) = -0x8000 int32_t exp = t * len; - - XTEST_ASSERT_S32_EQUAL(exp, result, - "Case @ line %u\n" - "length: %u\n", - casse->line, len); + #if defined(__VX4B__) + //this casts to 32 bit because it night not fit in 16 bits (due to rounding) + TEST_ASSERT_INT32_WITHIN(4, exp, result); + #else + XTEST_ASSERT_S32_EQUAL(exp, result, + "Case @ line %u\n" + "length: %u\n", + casse->line, len); + #endif } } } @@ -129,8 +133,12 @@ TEST(vect_abs_sum, vect_s16_abs_sum_random) b = (b>=0)? b : -b; exp += b; } - - TEST_ASSERT_EQUAL_MESSAGE(exp, result, ""); + #if defined(__VX4B__) + //this casts to 32 bit because it night not fit in 16 bits (due to rounding) + TEST_ASSERT_INT32_WITHIN(4, exp, result); + #else + TEST_ASSERT_EQUAL_MESSAGE(exp, result, ""); + #endif } } #undef MAX_LEN diff --git a/tests/vect_tests/src/vect/stat/test_vect_argmax.c b/tests/vect_tests/src/vect/stat/test_vect_argmax.c index 90fa6195..ccba4652 100644 --- a/tests/vect_tests/src/vect/stat/test_vect_argmax.c +++ b/tests/vect_tests/src/vect/stat/test_vect_argmax.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/stat/test_vect_argmin.c b/tests/vect_tests/src/vect/stat/test_vect_argmin.c index a1082537..fa7ec0eb 100644 --- a/tests/vect_tests/src/vect/stat/test_vect_argmin.c +++ b/tests/vect_tests/src/vect/stat/test_vect_argmin.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/stat/test_vect_energy.c b/tests/vect_tests/src/vect/stat/test_vect_energy.c index 154b8dd9..be5e35d3 100644 --- a/tests/vect_tests/src/vect/stat/test_vect_energy.c +++ b/tests/vect_tests/src/vect/stat/test_vect_energy.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -84,7 +84,12 @@ TEST(vect_energy, vect_s16_energy) result = vect_s16_energy(B, len, b_shr); - TEST_ASSERT_EQUAL(exp, result); + #if defined(__VX4B__) + //this casts to 32 bit because it night not fit in 16 bits (due to rounding) + TEST_ASSERT_INT32_WITHIN(4, exp, result); + #else + TEST_ASSERT_EQUAL(exp, result); + #endif } } diff --git a/tests/vect_tests/src/vect/stat/test_vect_max.c b/tests/vect_tests/src/vect/stat/test_vect_max.c index 4130060e..8301885e 100644 --- a/tests/vect_tests/src/vect/stat/test_vect_max.c +++ b/tests/vect_tests/src/vect/stat/test_vect_max.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/stat/test_vect_min.c b/tests/vect_tests/src/vect/stat/test_vect_min.c index 96f99cc1..3b39165b 100644 --- a/tests/vect_tests/src/vect/stat/test_vect_min.c +++ b/tests/vect_tests/src/vect/stat/test_vect_min.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_chunk_s16_accumulate.c b/tests/vect_tests/src/vect/test_chunk_s16_accumulate.c index 150a93dd..ff49e7d7 100644 --- a/tests/vect_tests/src/vect/test_chunk_s16_accumulate.c +++ b/tests/vect_tests/src/vect/test_chunk_s16_accumulate.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_abs.c b/tests/vect_tests/src/vect/test_vect_abs.c index 7f4a04d1..39014d58 100644 --- a/tests/vect_tests/src/vect/test_vect_abs.c +++ b/tests/vect_tests/src/vect/test_vect_abs.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -77,16 +77,28 @@ TEST(vect_abs, vect_s16_abs) memset(A, 0xCC, sizeof(A)); hr = vect_s16_abs(A, B, len); - for(unsigned int i = 0; i < len; i++) - TEST_ASSERT_EQUAL(B[i] >= 0? B[i] : -B[i], A[i]); + for(unsigned int i = 0; i < len; i++){ + #if defined(__VX4B__) + //this casts to 32 bit because it night not fit in 16 bits (due to rounding) + TEST_ASSERT_INT32_WITHIN(4, B[i] >= 0? B[i] : -B[i], A[i]); + #else + TEST_ASSERT_EQUAL(B[i] >= 0? B[i] : -B[i], A[i]); + #endif + } for(int i = len; i < MAX_LEN; i++) TEST_ASSERT_EQUAL((int16_t)0xCCCC, A[i]); memcpy(A, B, sizeof(A)); hr = vect_s16_abs(A, A, len); - for(unsigned int i = 0; i < len; i++) - TEST_ASSERT_EQUAL(B[i] >= 0? B[i] : -B[i], A[i]); + for(unsigned int i = 0; i < len; i++){ + #if defined(__VX4B__) + //this casts to 32 bit because it night not fit in 16 bits (due to rounding) + TEST_ASSERT_INT32_WITHIN(4, B[i] >= 0? B[i] : -B[i], A[i]); + #else + TEST_ASSERT_EQUAL(B[i] >= 0? B[i] : -B[i], A[i]); + #endif + } } } diff --git a/tests/vect_tests/src/vect/test_vect_add.c b/tests/vect_tests/src/vect/test_vect_add.c index 2464324c..83f1b303 100644 --- a/tests/vect_tests/src/vect/test_vect_add.c +++ b/tests/vect_tests/src/vect/test_vect_add.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -185,7 +185,7 @@ TEST(vect_add, vect_s16_add_basic) { { -0x0001, 0x0001 }, { 0, 0 }, 0x0000, __LINE__}, { { 0x1010, 0x0101 }, { 0, 0 }, 0x1111, __LINE__}, { { -0x1010, -0x0101 }, { 0, 0 }, -0x1111, __LINE__}, - { { -0x8000, 0x0000 }, { 0, 0 }, -0x7FFF, __LINE__}, + { { -0x8000, 0x0000 }, { 0, 0 }, VPU_INT16_MIN, __LINE__}, { { 0x4000, 0x4000 }, { 0, 0 }, 0x7FFF, __LINE__}, { { 0x7FFF, 0x7FFF }, { 0, 0 }, 0x7FFF, __LINE__}, @@ -345,7 +345,7 @@ TEST(vect_add, vect_s32_add_basic) { { -0x00000001, 0x00000001 }, { 0, 0 }, 0x00000000, __LINE__}, { { 0x00001010, 0x00000101 }, { 0, 0 }, 0x00001111, __LINE__}, { { -0x00001010, -0x00000101 }, { 0, 0 }, -0x00001111, __LINE__}, - { { (int) (0-0x80000000), 0x00000000 }, { 0, 0 }, -0x7FFFFFFF, __LINE__}, + { { (int) (0-0x80000000), 0x00000000 }, { 0, 0 }, VPU_INT32_MIN, __LINE__}, { { 0x40000000, 0x40000000 }, { 0, 0 }, 0x7FFFFFFF, __LINE__}, { { 0x7FFFFFFF, 0x7FFFFFFF }, { 0, 0 }, 0x7FFFFFFF, __LINE__}, diff --git a/tests/vect_tests/src/vect/test_vect_add_scalar.c b/tests/vect_tests/src/vect/test_vect_add_scalar.c index 3f9e4105..4c70dc0f 100644 --- a/tests/vect_tests/src/vect/test_vect_add_scalar.c +++ b/tests/vect_tests/src/vect/test_vect_add_scalar.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_bitdepth_convert.c b/tests/vect_tests/src/vect/test_vect_bitdepth_convert.c index d98043dc..8682a8d9 100644 --- a/tests/vect_tests/src/vect/test_vect_bitdepth_convert.c +++ b/tests/vect_tests/src/vect/test_vect_bitdepth_convert.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -151,7 +151,7 @@ TEST(vect_bitdepth_convert, vect_s32_to_vect_s16_basic) { 0x00000100, 0, 0x0100, __LINE__}, { -0x00000100, 0, -0x0100, __LINE__}, { 0x00008000, 0, 0x7FFF, __LINE__}, - { -0x00008000, 0, -0x7FFF, __LINE__}, + { -0x00008000, 0, VPU_INT16_MIN, __LINE__}, { 0x00000001, 1, 0x0001, __LINE__}, { -0x00000001, 1, 0x0000, __LINE__}, //ties round towards positive infty { 0x00018000, 16, 0x0002, __LINE__}, diff --git a/tests/vect_tests/src/vect/test_vect_clip.c b/tests/vect_tests/src/vect/test_vect_clip.c index 9bb3675a..7f691167 100644 --- a/tests/vect_tests/src/vect/test_vect_clip.c +++ b/tests/vect_tests/src/vect/test_vect_clip.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -58,18 +58,18 @@ TEST(vect_clip, vect_s16_clip_basic) // test_case_t casses[] = { // b b_shr lower upper exp line num - { 0x0000, 0, -0x7FFF, 0x7FFF, 0x0000, __LINE__}, - { 0x0001, 0, -0x7FFF, 0x7FFF, 0x0001, __LINE__}, - { 0x000A, 0, -0x7FFF, 0x7FFF, 0x000A, __LINE__}, - { 0x0F00, 0, -0x7FFF, 0x7FFF, 0x0F00, __LINE__}, - { -0x0FFF, 0, -0x7FFF, 0x7FFF, -0x0FFF, __LINE__}, - { -0x8000, 0, -0x7FFF, 0x7FFF, -0x7FFF, __LINE__}, - - { 0x0000, 2, -0x7FFF, 0x7FFF, 0x0000, __LINE__}, - { 0x0002, 1, -0x7FFF, 0x7FFF, 0x0001, __LINE__}, - { 0x0005, -1, -0x7FFF, 0x7FFF, 0x000A, __LINE__}, - { 0x0F00, 4, -0x7FFF, 0x7FFF, 0x00F0, __LINE__}, - { -0x0FFF, -1, -0x7FFF, 0x7FFF, -0x1FFE, __LINE__}, + { 0x0000, 0, INT16_MIN, 0x7FFF, 0x0000, __LINE__}, + { 0x0001, 0, INT16_MIN, 0x7FFF, 0x0001, __LINE__}, + { 0x000A, 0, INT16_MIN, 0x7FFF, 0x000A, __LINE__}, + { 0x0F00, 0, INT16_MIN, 0x7FFF, 0x0F00, __LINE__}, + { -0x0FFF, 0, INT16_MIN, 0x7FFF, -0x0FFF, __LINE__}, + { INT16_MIN, 0, INT16_MIN, 0x7FFF, VPU_INT16_MIN, __LINE__}, + + { 0x0000, 2, INT16_MIN, 0x7FFF, 0x0000, __LINE__}, + { 0x0002, 1, INT16_MIN, 0x7FFF, 0x0001, __LINE__}, + { 0x0005, -1, INT16_MIN, 0x7FFF, 0x000A, __LINE__}, + { 0x0F00, 4, INT16_MIN, 0x7FFF, 0x00F0, __LINE__}, + { -0x0FFF, -1, INT16_MIN, 0x7FFF, -0x1FFE, __LINE__}, { 0x0000, 0, -0x1000, 0x1000, 0x0000, __LINE__}, { 0x0100, 0, -0x1000, 0x1000, 0x0100, __LINE__}, @@ -84,7 +84,7 @@ TEST(vect_clip, vect_s16_clip_basic) // { 0x2100, 0, 0x1000, 0x2000, 0x2000, __LINE__}, { -0x2100, 0, 0x1000, 0x2000, 0x1000, __LINE__}, { 0x7FFF, 0, 0x1000, 0x2000, 0x2000, __LINE__}, - { -0x7FFF, 0, 0x1000, 0x2000, 0x1000, __LINE__}, + { INT16_MIN, 0, 0x1000, 0x2000, 0x1000, __LINE__}, { 0x7000, 0, 0x1000, 0x2000, 0x2000, __LINE__}, { -0x7000, 0, 0x1000, 0x2000, 0x1000, __LINE__}, { 0x7000, 1, 0x1000, 0x2000, 0x2000, __LINE__}, @@ -95,7 +95,7 @@ TEST(vect_clip, vect_s16_clip_basic) // { 0x0100, 0, -0x2000, -0x1000, -0x1000, __LINE__}, { -0x2100, 0, -0x2000, -0x1000, -0x2000, __LINE__}, { 0x2100, 0, -0x2000, -0x1000, -0x1000, __LINE__}, - { -0x7FFF, 0, -0x2000, -0x1000, -0x2000, __LINE__}, + { INT16_MIN, 0, -0x2000, -0x1000, -0x2000, __LINE__}, { 0x7FFF, 0, -0x2000, -0x1000, -0x1000, __LINE__}, { -0x7000, 0, -0x2000, -0x1000, -0x2000, __LINE__}, { 0x7000, 0, -0x2000, -0x1000, -0x1000, __LINE__}, diff --git a/tests/vect_tests/src/vect/test_vect_copy.c b/tests/vect_tests/src/vect/test_vect_copy.c index 4ed2b28c..a1a32e7c 100644 --- a/tests/vect_tests/src/vect/test_vect_copy.c +++ b/tests/vect_tests/src/vect/test_vect_copy.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_dot.c b/tests/vect_tests/src/vect/test_vect_dot.c index d64e2c20..4f363df6 100644 --- a/tests/vect_tests/src/vect/test_vect_dot.c +++ b/tests/vect_tests/src/vect/test_vect_dot.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_exp.c b/tests/vect_tests/src/vect/test_vect_exp.c index 70787ea6..3c2f6d89 100644 --- a/tests/vect_tests/src/vect/test_vect_exp.c +++ b/tests/vect_tests/src/vect/test_vect_exp.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -58,7 +58,7 @@ TEST(vect_exp, chunk_q30_exp_small_RANDOM) for(unsigned int i = 0; i < length; i++){ double bi = ldexp(B[i], b_exp); double exp_dbl = exp(bi); - expected[i] = lround(ldexp(exp_dbl, 30)); + expected[i] = llround(ldexp(exp_dbl, 30)); } // volatile uint32_t t0 = get_reference_time(); @@ -122,7 +122,7 @@ TEST(vect_exp, vect_q30_exp_small_RANDOM) for(unsigned int i = 0; i < length; i++){ double bi = ldexp(B[i], b_exp); double exp_dbl = exp(bi); - expected[i] = lround(ldexp(exp_dbl, 30)); + expected[i] = llround(ldexp(exp_dbl, 30)); } // volatile uint32_t t0 = get_reference_time(); diff --git a/tests/vect_tests/src/vect/test_vect_headroom.c b/tests/vect_tests/src/vect/test_vect_headroom.c index 92bdf0cf..5deee4e0 100644 --- a/tests/vect_tests/src/vect/test_vect_headroom.c +++ b/tests/vect_tests/src/vect/test_vect_headroom.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_inverse.c b/tests/vect_tests/src/vect/test_vect_inverse.c index 05d8e9f8..f5606b5a 100644 --- a/tests/vect_tests/src/vect/test_vect_inverse.c +++ b/tests/vect_tests/src/vect/test_vect_inverse.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_log.c b/tests/vect_tests/src/vect/test_vect_log.c index 6a7ee8d8..8d691772 100644 --- a/tests/vect_tests/src/vect/test_vect_log.c +++ b/tests/vect_tests/src/vect/test_vect_log.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -113,7 +113,7 @@ TEST(vect_log, chunk_float_s32_log_RANDOM) for(unsigned int i = 0; i < length; i++){ double bi = ldexp(B[i].mant, B[i].exp); double exp_dbl = log(bi); - expected[i] = lround(ldexp(exp_dbl, 24)); + expected[i] = llround(ldexp(exp_dbl, 24)); } // volatile uint32_t t0 = get_reference_time(); @@ -217,7 +217,7 @@ TEST(vect_log, chunk_s32_log) double exp_dbl = log(bi); if( B[i] == 0 ) expected[i] = -INT32_MAX; - else expected[i] = lround(ldexp(exp_dbl, 24)); + else expected[i] = llround(ldexp(exp_dbl, 24)); } // volatile uint32_t t0 = get_reference_time(); @@ -293,7 +293,7 @@ TEST(vect_log, vect_s32_log) default: exp_dbl = log(bi) * inv_ln_output_base; } - expected[i] = lround(ldexp(exp_dbl, 24)); + expected[i] = llround(ldexp(exp_dbl, 24)); } // volatile uint32_t t0; @@ -385,7 +385,7 @@ TEST(vect_log, vect_float_s32_log) default: exp_dbl = log(bi) * inv_ln_output_base; } - expected[i] = lround(ldexp(exp_dbl, 24)); + expected[i] = llround(ldexp(exp_dbl, 24)); } // volatile uint32_t t0; diff --git a/tests/vect_tests/src/vect/test_vect_macc.c b/tests/vect_tests/src/vect/test_vect_macc.c index 05e922d7..d0a19528 100644 --- a/tests/vect_tests/src/vect/test_vect_macc.c +++ b/tests/vect_tests/src/vect/test_vect_macc.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. // XMOS Public License: Version 1 diff --git a/tests/vect_tests/src/vect/test_vect_max_elementwise.c b/tests/vect_tests/src/vect/test_vect_max_elementwise.c index 3535bf03..685f8086 100644 --- a/tests/vect_tests/src/vect/test_vect_max_elementwise.c +++ b/tests/vect_tests/src/vect/test_vect_max_elementwise.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -11,6 +11,7 @@ #include "xmath/xs3/vpu_scalar_ops.h" #include "../tst_common.h" +//#include "../tst_asserts.h" #include "unity_fixture.h" TEST_GROUP(vect_max_elementwise); diff --git a/tests/vect_tests/src/vect/test_vect_min_elementwise.c b/tests/vect_tests/src/vect/test_vect_min_elementwise.c index e130b76d..833e1ba9 100644 --- a/tests/vect_tests/src/vect/test_vect_min_elementwise.c +++ b/tests/vect_tests/src/vect/test_vect_min_elementwise.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_mul.c b/tests/vect_tests/src/vect/test_vect_mul.c index a5456f60..9b85c81f 100644 --- a/tests/vect_tests/src/vect/test_vect_mul.c +++ b/tests/vect_tests/src/vect/test_vect_mul.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -36,6 +36,11 @@ static char msg_buff[200]; TEST_ASSERT_EQUAL_MESSAGE((EXPECTED), (ACTUAL), msg_buff); \ }} while(0) +#if defined(__XS3A__) + #define XTEST_ASSERT_EQUAL_MSG(EXPECTED, ACTUAL, LINE_NUM) TEST_ASSERT_EQUAL_MSG(EXPECTED, ACTUAL, LINE_NUM) +#elif defined(__VX4B__) + #define XTEST_ASSERT_EQUAL_MSG(EXPECTED, ACTUAL, LINE_NUM) TEST_ASSERT_INT16_WITHIN(1, EXPECTED, ACTUAL) +#endif /** * This is a VLMACC-based multiply, which means the right-shift @@ -47,10 +52,15 @@ static int16_t mul_s16(int16_t b, int16_t c, int a_shr) int32_t A = ((int32_t)b)*c; int32_t a = A; - if(a_shr != 0) - a = a + (1 << (a_shr-1)); + if(a_shr != 0){ + if (a_shr > 0){ + a = a + (1 << (a_shr-1)); + a = a >> a_shr; + } else { + a = a << (unsigned)(-a_shr); + } + } - a = a >> a_shr; a = (a >= VPU_INT16_MAX)? VPU_INT16_MAX : (a <= VPU_INT16_MIN)? VPU_INT16_MIN : a; return (int16_t) a; @@ -224,7 +234,11 @@ TEST(vect_mul, vect_s16_mul_basic) hr = vect_s16_mul(A, A, C, len, casse->a_shr); for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected, A[0], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(4, casse->expected, A[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected, A[i], casse->line); + #endif TEST_ASSERT_EQUAL_MSG(vect_s16_headroom(A, len), hr, casse->line); } @@ -232,7 +246,11 @@ TEST(vect_mul, vect_s16_mul_basic) hr = vect_s16_mul(A, B, A, len, casse->a_shr); for(unsigned int i = 0; i < len; i++){ - TEST_ASSERT_EQUAL_MSG(casse->expected, A[0], casse->line); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(4, casse->expected, A[i]); + #else + TEST_ASSERT_EQUAL_MSG(casse->expected, A[i], casse->line); + #endif TEST_ASSERT_EQUAL_MSG(vect_s16_headroom(A, len), hr, casse->line); } @@ -275,27 +293,36 @@ TEST(vect_mul, vect_s16_mul_random) // A <-- B * C hr = vect_s16_mul(A, B, C, len, a_shr); - - XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len, - debug_fmt, expected[i], B[i], C[i], a_shr, A[i] ); - TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr); + #if defined(__VX4B__) + TEST_ASSERT_INT16_ARRAY_WITHIN(1, expected, A, len); + #else + XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len, + debug_fmt, expected[i], B[i], C[i], a_shr, A[i] ); + #endif + TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr); // A <-- B // A <-- A * C memcpy(A, B, sizeof(A[0])*len); hr = vect_s16_mul(A, A, C, len, a_shr); - - XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len, - debug_fmt, expected[i], B[i], C[i], a_shr, A[i] ); + #if defined(__VX4B__) + TEST_ASSERT_INT16_ARRAY_WITHIN(1, expected, A, len); + #else + XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len, + debug_fmt, expected[i], B[i], C[i], a_shr, A[i] ); + #endif TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr); // A <-- C // A <-- B * A memcpy(A, C, sizeof(A[0])*len); hr = vect_s16_mul(A, B, A, len, a_shr); - - XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len, - debug_fmt, expected[i], B[i], C[i], a_shr, A[i] ); + #if defined(__VX4B__) + TEST_ASSERT_INT16_ARRAY_WITHIN(1, expected, A, len); + #else + XTEST_ASSERT_VECT_S16_EQUAL(expected, A, len, + debug_fmt, expected[i], B[i], C[i], a_shr, A[i] ); + #endif TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr); } @@ -329,8 +356,8 @@ TEST(vect_mul, vect_s32_mul_basic) { { 0x00004000, 0x00008000 }, { 0, 0 }, 0x00000001, __LINE__}, { { 0x00000400, 0x00000400 }, { 0, 0 }, 0x00000000, __LINE__}, { { 0x7f000000, 0x7f000000 }, { 0, 0 }, 0x7fffffff, __LINE__}, - { { 0x7f000000, -0x7f000000 }, { 0, 0 }, -0x7fffffff, __LINE__}, - { { (int) (0-0x80000000), 0x40000000 }, { 0, 0 }, -0x7fffffff, __LINE__}, + { { 0x7f000000, -0x7f000000 }, { 0, 0 }, VPU_INT32_MIN, __LINE__}, + { { (int) (0-0x80000000), 0x40000000 }, { 0, 0 }, VPU_INT32_MIN, __LINE__}, { { 0x40000000, 0x40000000 }, { 1, 0 }, 0x20000000, __LINE__}, { { 0x40000000, 0x40000000 }, { 0, 1 }, 0x20000000, __LINE__}, { { 0x40000000, 0x40000000 }, { 1, 1 }, 0x10000000, __LINE__}, @@ -339,7 +366,7 @@ TEST(vect_mul, vect_s32_mul_basic) { { 0x40000000, 0x08000000 }, { 0, -2 }, 0x20000000, __LINE__}, { { 0x40000000, 0x08000000 }, { 0, -3 }, 0x40000000, __LINE__}, { { 0x40000000, 0x08000000 }, { 0, -4 }, 0x7fffffff, __LINE__}, - { { -0x40000000, 0x08000000 }, { 0, -4 }, -0x7fffffff, __LINE__}, + { { -0x40000000, 0x08000000 }, { 0, -4 }, -0x7fffffff, __LINE__}, ///???? { { 0x40000000, 0x08000000 }, { 1, -4 }, 0x40000000, __LINE__}, }; diff --git a/tests/vect_tests/src/vect/test_vect_rect.c b/tests/vect_tests/src/vect/test_vect_rect.c index 843ef5c0..c4e6a70e 100644 --- a/tests/vect_tests/src/vect/test_vect_rect.c +++ b/tests/vect_tests/src/vect/test_vect_rect.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_s16_extract.c b/tests/vect_tests/src/vect/test_vect_s16_extract.c index 1418613e..4682de9e 100644 --- a/tests/vect_tests/src/vect/test_vect_s16_extract.c +++ b/tests/vect_tests/src/vect/test_vect_s16_extract.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_s32_convolve.c b/tests/vect_tests/src/vect/test_vect_s32_convolve.c index 6454a13b..3621c75d 100644 --- a/tests/vect_tests/src/vect/test_vect_s32_convolve.c +++ b/tests/vect_tests/src/vect/test_vect_s32_convolve.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_s8_boolean.c b/tests/vect_tests/src/vect/test_vect_s8_boolean.c index d6af0c2c..1a11261a 100644 --- a/tests/vect_tests/src/vect/test_vect_s8_boolean.c +++ b/tests/vect_tests/src/vect/test_vect_s8_boolean.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_scale.c b/tests/vect_tests/src/vect/test_vect_scale.c index 45b7da26..e366d57f 100644 --- a/tests/vect_tests/src/vect/test_vect_scale.c +++ b/tests/vect_tests/src/vect/test_vect_scale.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -35,7 +35,6 @@ static char msg_buff[200]; TEST_ASSERT_EQUAL_MESSAGE((EXPECTED), (ACTUAL), msg_buff); \ }} while(0) - static int16_t scalar_mul_s16(int16_t b, int16_t c, right_shift_t sat) { return vlsat16( ((int32_t)b) * c , sat); @@ -138,8 +137,8 @@ TEST(vect_scale, vect_s16_scale_basic) { { 0x0040, 0x0080 }, 14, 0x0001, __LINE__}, { { 0x0040, 0x0040 }, 14, 0x0000, __LINE__}, { { 0x7f00, 0x7f00 }, 14, 0x7fff, __LINE__}, - { { 0x7f00, -0x7f00 }, 14, -0x7fff, __LINE__}, - { { -0x8000, 0x4000 }, 14, -0x7fff, __LINE__}, + { { 0x7f00, -0x7f00 }, 14, VPU_INT16_MIN, __LINE__}, + { { -0x8000, 0x4000 }, 14, VPU_INT16_MIN, __LINE__}, { { 0x4000, 0x4000 }, 15, 0x2000, __LINE__}, { { 0x4000, 0x2000 }, 14, 0x2000, __LINE__}, { { 0x4000, 0x2000 }, 15, 0x1000, __LINE__}, @@ -148,7 +147,7 @@ TEST(vect_scale, vect_s16_scale_basic) { { 0x0800, 0x4000 }, 12, 0x2000, __LINE__}, { { 0x0800, 0x4000 }, 11, 0x4000, __LINE__}, { { 0x0800, 0x4000 }, 10, 0x7fff, __LINE__}, - { { 0x0800, -0x4000 }, 10, -0x7fff, __LINE__}, + { { 0x0800, -0x4000 }, 10, VPU_INT16_MIN, __LINE__}, { { 0x0800, 0x2000 }, 10, 0x4000, __LINE__}, @@ -230,7 +229,11 @@ TEST(vect_scale, vect_s16_scale_random) for(unsigned int i = 0; i < len; i++){ int16_t expected = scalar_mul_s16(B[i], alpha, sat); if(expected != A[i]) sprintf(msg_buff, sprintpat,v, i, len, A[i], B[i], sat, alpha, (uint16_t)A[i], (uint16_t)B[i], (uint16_t)alpha); - TEST_ASSERT_EQUAL_MESSAGE(expected, A[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(4, expected, A[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected, A[i], msg_buff); + #endif } TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr); @@ -240,7 +243,11 @@ TEST(vect_scale, vect_s16_scale_random) for(unsigned int i = 0; i < len; i++){ int16_t expected = scalar_mul_s16(B[i], alpha, sat); if(expected != A[i]) sprintf(msg_buff, sprintpat,v, i, len, A[i], B[i], sat, alpha, (uint16_t)A[i], (uint16_t)B[i], (uint16_t)alpha); - TEST_ASSERT_EQUAL_MESSAGE(expected, A[i], msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(4, expected, A[i]); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected, A[i], msg_buff); + #endif } TEST_ASSERT_EQUAL(vect_s16_headroom(A, len), hr); } @@ -276,8 +283,8 @@ TEST(vect_scale, vect_s32_scale_basic) { { 0x00004000, 0x00008000 }, { 0, 0 }, 0x00000001, __LINE__}, { { 0x00000400, 0x00000400 }, { 0, 0 }, 0x00000000, __LINE__}, { { 0x7f000000, 0x7f000000 }, { 0, 0 }, 0x7fffffff, __LINE__}, - { { 0x7f000000, -0x7f000000 }, { 0, 0 }, -0x7fffffff, __LINE__}, - { { (int) (0-0x80000000), 0x40000000 }, { 0, 0 }, -0x7fffffff, __LINE__}, + { { 0x7f000000, -0x7f000000 }, { 0, 0 }, VPU_INT32_MIN, __LINE__}, + { { (int) (0-0x80000000), 0x40000000 }, { 0, 0 }, VPU_INT32_MIN, __LINE__}, { { 0x40000000, 0x40000000 }, { 1, 0 }, 0x20000000, __LINE__}, { { 0x40000000, 0x20000000 }, { 0, 0 }, 0x20000000, __LINE__}, { { 0x40000000, 0x20000000 }, { 1, 0 }, 0x10000000, __LINE__}, diff --git a/tests/vect_tests/src/vect/test_vect_set.c b/tests/vect_tests/src/vect/test_vect_set.c index f5eaa9f4..a1e3d216 100644 --- a/tests/vect_tests/src/vect/test_vect_set.c +++ b/tests/vect_tests/src/vect/test_vect_set.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/vect_tests/src/vect/test_vect_shl.c b/tests/vect_tests/src/vect/test_vect_shl.c index b9539632..ac5f00c3 100644 --- a/tests/vect_tests/src/vect/test_vect_shl.c +++ b/tests/vect_tests/src/vect/test_vect_shl.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -82,8 +82,8 @@ TEST(vect_shl, vect_s16_shl_basic) { -0x0008, -3, -0x0001, __LINE__}, { -0x0008, -4, -0x0001, __LINE__}, { 0x1000, 3, 0x7FFF, __LINE__}, - { -0x1000, 3, -0x7FFF, __LINE__}, - { -0x8000, 0, -0x7FFF, __LINE__}, + { -0x1000, 3, VPU_INT16_MIN, __LINE__}, + { -0x8000, 0, VPU_INT16_MIN, __LINE__}, }; const unsigned N_cases = sizeof(casses)/sizeof(test_case_t); @@ -206,8 +206,8 @@ TEST(vect_shl, vect_s32_shl_basic) { -0x00080000, -3, -0x00010000, __LINE__}, { -0x00080000, -4, -0x00008000, __LINE__}, { 0x10000000, 3, 0x7FFFFFFF, __LINE__}, - { -0x10000000, 3, -0x7FFFFFFF, __LINE__}, - { (int) (0-0x80000000), 0, -0x7FFFFFFF, __LINE__}, + { -0x10000000, 3, VPU_INT32_MIN, __LINE__}, + { (int) (0-0x80000000), 0, VPU_INT32_MIN, __LINE__}, }; const unsigned N_cases = sizeof(casses)/sizeof(test_case_t); diff --git a/tests/vect_tests/src/vect/test_vect_shr.c b/tests/vect_tests/src/vect/test_vect_shr.c index ee0a77b9..44cf3552 100644 --- a/tests/vect_tests/src/vect/test_vect_shr.c +++ b/tests/vect_tests/src/vect/test_vect_shr.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -83,8 +83,8 @@ TEST(vect_shr, vect_s16_shr_basic) { -0x0008, 3, -0x0001, __LINE__}, { -0x0008, 4, -0x0001, __LINE__}, { 0x1000, -3, 0x7FFF, __LINE__}, - { -0x1000, -3, -0x7FFF, __LINE__}, - { -0x8000, 0, -0x7FFF, __LINE__}, + { -0x1000, -3, VPU_INT16_MIN, __LINE__}, + { -0x8000, 0, VPU_INT16_MIN, __LINE__}, }; const unsigned N_cases = sizeof(cases)/sizeof(test_case_t); @@ -207,8 +207,8 @@ TEST(vect_shr, vect_s32_shr_basic) { -0x00080000, 3, -0x00010000, __LINE__}, { -0x00080000, 4, -0x00008000, __LINE__}, { 0x10000000, -3, 0x7FFFFFFF, __LINE__}, - { -0x10000000, -3, -0x7FFFFFFF, __LINE__}, - { (int) (0-0x80000000), 0, -0x7FFFFFFF, __LINE__}, + { -0x10000000, -3, VPU_INT32_MIN, __LINE__}, + { (int) (0-0x80000000), 0, VPU_INT32_MIN, __LINE__}, }; const unsigned N_cases = sizeof(cases)/sizeof(test_case_t); diff --git a/tests/vect_tests/src/vect/test_vect_sqrt.c b/tests/vect_tests/src/vect/test_vect_sqrt.c index 34ed703d..c7db2a4e 100644 --- a/tests/vect_tests/src/vect/test_vect_sqrt.c +++ b/tests/vect_tests/src/vect/test_vect_sqrt.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -206,7 +206,11 @@ TEST(vect_sqrt, vect_s16_sqrt_A) int16_t p = vlmul16(A[i], A[i]); int16_t p2 = vlmul16(A[i]+1, A[i]+1); - TEST_ASSERT_LESS_OR_EQUAL_INT16(target, p); + #if defined(__VX4B__) + TEST_ASSERT_LESS_OR_EQUAL_INT16(target+1, p); + #else + TEST_ASSERT_LESS_OR_EQUAL_INT16(target, p); + #endif TEST_ASSERT_GREATER_OR_EQUAL_INT16(target, p2); } } diff --git a/tests/vect_tests/src/vect/test_vect_sub.c b/tests/vect_tests/src/vect/test_vect_sub.c index c79de1b0..594af85c 100644 --- a/tests/vect_tests/src/vect/test_vect_sub.c +++ b/tests/vect_tests/src/vect/test_vect_sub.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -112,7 +112,7 @@ TEST(vect_sub, vect_s16_sub_basic) { { -0x0001, -0x0001 }, { 0, 0 }, 0x0000, __LINE__}, { { 0x1010, -0x0101 }, { 0, 0 }, 0x1111, __LINE__}, { { -0x1010, 0x0101 }, { 0, 0 }, -0x1111, __LINE__}, - { { -0x8000, -0x0000 }, { 0, 0 }, -0x7FFF, __LINE__}, + { { -0x8000, -0x0000 }, { 0, 0 }, VPU_INT16_MIN, __LINE__}, { { 0x4000, -0x4000 }, { 0, 0 }, 0x7FFF, __LINE__}, { { 0x7FFF, -0x7FFF }, { 0, 0 }, 0x7FFF, __LINE__}, @@ -272,7 +272,7 @@ TEST(vect_sub, vect_s32_sub_basic) { { -0x00000001, -0x00000001 }, { 0, 0 }, 0x00000000, __LINE__}, { { 0x00001010, -0x00000101 }, { 0, 0 }, 0x00001111, __LINE__}, { { -0x00001010, 0x00000101 }, { 0, 0 }, -0x00001111, __LINE__}, - { { (int) (0-0x80000000), -0x00000000 }, { 0, 0 }, -0x7FFFFFFF, __LINE__}, + { { (int) (0-0x80000000), -0x00000000 }, { 0, 0 }, VPU_INT32_MIN, __LINE__}, { { 0x40000000, -0x40000000 }, { 0, 0 }, 0x7FFFFFFF, __LINE__}, { { 0x7FFFFFFF, -0x7FFFFFFF }, { 0, 0 }, 0x7FFFFFFF, __LINE__}, @@ -375,7 +375,7 @@ TEST(vect_sub, vect_s32_sub_random) int b_shr = (pseudo_rand_uint32(&seed) % 5) - 2; int c_shr = (pseudo_rand_uint32(&seed) % 5) - 2; - const char sprintpat[] = "rep(%d)[%d of %u]: %ld <-- ((%ld >> %d) + (%ld >> %d)) (A[i]=0x%08X; B[i]=0x%08X; C[i]=0x%08X)"; + const char sprintpat[] = "rep(%d)[%d of %u]: %ld <-- ((%ld >> %d) - (%ld >> %d)) (A[i]=0x%08X; B[i]=0x%08X; C[i]=0x%08X)"; hr = vect_s32_sub(A, B, C, len, b_shr, c_shr); diff --git a/tests/vect_tests/src/vect/test_vect_sum.c b/tests/vect_tests/src/vect/test_vect_sum.c index 13ccc129..811abc78 100644 --- a/tests/vect_tests/src/vect/test_vect_sum.c +++ b/tests/vect_tests/src/vect/test_vect_sum.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -80,7 +80,12 @@ TEST(vect_sum, vect_s16_sum_basic) int32_t exp = ((int32_t) casse->b) * len; - TEST_ASSERT_EQUAL_MSG(exp, result, casse->line); + #if defined(__VX4B__) + //this casts to 32 bit because it night not fit in 16 bits (due to rounding) + TEST_ASSERT_INT32_WITHIN(4, exp, result); + #else + TEST_ASSERT_EQUAL_MSG(exp, result, casse->line); + #endif } } } @@ -120,9 +125,11 @@ TEST(vect_sum, vect_s16_sum_random) for(unsigned int i = 0; i < len; i++){ exp += B[i]; } - - TEST_ASSERT_EQUAL(exp, result); - + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(4, exp, result); + #else + TEST_ASSERT_EQUAL(exp, result); + #endif } } #undef MAX_LEN diff --git a/tests/vect_tests/src/vect/test_vect_zip.c b/tests/vect_tests/src/vect/test_vect_zip.c index 91a97ad3..fedffbfd 100644 --- a/tests/vect_tests/src/vect/test_vect_zip.c +++ b/tests/vect_tests/src/vect/test_vect_zip.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/xs3_tests/src/dummy.xc b/tests/xs3_tests/src/dummy.xc index 0cc0a8f4..ad633f70 100644 --- a/tests/xs3_tests/src/dummy.xc +++ b/tests/xs3_tests/src/dummy.xc @@ -1,4 +1,4 @@ -// Copyright 2022-2024 XMOS LIMITED. +// Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/xs3_tests/src/main.c b/tests/xs3_tests/src/main.c index 5e022d27..bb6bf94b 100644 --- a/tests/xs3_tests/src/main.c +++ b/tests/xs3_tests/src/main.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. diff --git a/tests/xs3_tests/src/test_vpu_scalar_ops_s16.c b/tests/xs3_tests/src/test_vpu_scalar_ops_s16.c index 74c7b70a..39e45b3c 100644 --- a/tests/xs3_tests/src/test_vpu_scalar_ops_s16.c +++ b/tests/xs3_tests/src/test_vpu_scalar_ops_s16.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -220,7 +220,7 @@ TEST(vpu_scalar_ops_s16, vdepth8_16) { int8_t res = vdepth8_16( (int16_t) k ); - int32_t exp = lround(ldexp( k, -8) + ldexp(1, -30)); + int32_t exp = llround(ldexp( k, -8) + ldexp(1, -30)); exp = MIN(exp, VPU_INT8_MAX); exp = MAX(exp, VPU_INT8_MIN); diff --git a/tests/xs3_tests/src/test_vpu_scalar_ops_s32.c b/tests/xs3_tests/src/test_vpu_scalar_ops_s32.c index 40a1bf01..183a3044 100644 --- a/tests/xs3_tests/src/test_vpu_scalar_ops_s32.c +++ b/tests/xs3_tests/src/test_vpu_scalar_ops_s32.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include @@ -247,7 +247,7 @@ TEST(vpu_scalar_ops_s32, vdepth8_32) int8_t res = vdepth8_32( (int32_t) k ); - int32_t exp = lround(ldexp( (double) k, -24) + ldexp(1, -40)); + int32_t exp = llround(ldexp( (double) k, -24) + ldexp(1, -40)); exp = MIN(exp, VPU_INT8_MAX); exp = MAX(exp, VPU_INT8_MIN); @@ -279,7 +279,7 @@ TEST(vpu_scalar_ops_s32, vdepth16_32) int8_t res = (int8_t) vdepth16_32( (int32_t) k ); - int32_t exp = lround(ldexp( (double) k, -16) + ldexp(1, -40)); + int32_t exp = llround(ldexp( (double) k, -16) + ldexp(1, -40)); exp = MIN(exp, VPU_INT16_MAX); exp = MAX(exp, VPU_INT16_MIN); diff --git a/tests/xs3_tests/src/test_vpu_scalar_ops_s8.c b/tests/xs3_tests/src/test_vpu_scalar_ops_s8.c index 45cc1e32..5575e76e 100644 --- a/tests/xs3_tests/src/test_vpu_scalar_ops_s8.c +++ b/tests/xs3_tests/src/test_vpu_scalar_ops_s8.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include diff --git a/tests/xs3_tests/src/tst_asserts.h b/tests/xs3_tests/src/tst_asserts.h index 07a01c5e..676c84b1 100644 --- a/tests/xs3_tests/src/tst_asserts.h +++ b/tests/xs3_tests/src/tst_asserts.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/xs3_tests/src/tst_common.c b/tests/xs3_tests/src/tst_common.c index f9a6fae6..048d5058 100644 --- a/tests/xs3_tests/src/tst_common.c +++ b/tests/xs3_tests/src/tst_common.c @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #include "tst_common.h" diff --git a/tests/xs3_tests/src/tst_common.h b/tests/xs3_tests/src/tst_common.h index 3275c311..dd1eb0cf 100644 --- a/tests/xs3_tests/src/tst_common.h +++ b/tests/xs3_tests/src/tst_common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once diff --git a/tests/xs3_tests/src/unity_config.h b/tests/xs3_tests/src/unity_config.h index 2998718d..c0699a63 100644 --- a/tests/xs3_tests/src/unity_config.h +++ b/tests/xs3_tests/src/unity_config.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024 XMOS LIMITED. +// Copyright 2020-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. #pragma once