From 22d6d49676a298766c801d1b074f9eb755fe23b2 Mon Sep 17 00:00:00 2001 From: Michael Yeh <111819036+myeh01@users.noreply.github.com> Date: Wed, 20 Nov 2024 19:12:16 -0800 Subject: [PATCH 1/3] Add the sifive_rvv configuration --- config/sifive_rvv/bli_cntx_init_sifive_rvv.c | 222 +++++++++++++++++ config/sifive_rvv/bli_family_sifive_rvv.h | 34 +++ .../sifive_rvv/bli_kernel_defs_sifive_rvv.h | 55 +++++ config/sifive_rvv/make_defs.mk | 80 ++++++ .../sifive_x280/bli_cntx_init_sifive_x280.c | 230 +++++++++--------- config/sifive_x280/make_defs.mk | 4 +- config_registry | 3 +- frame/base/bli_arch.c | 4 + frame/include/bli_arch_config.h | 6 + frame/include/bli_gentconf_macro_defs.h | 6 + frame/include/bli_type_defs.h | 1 + .../bli_addv_sifive_rvv_intr.c} | 12 +- .../bli_addv_sifive_rvv_intr_complex.c} | 2 +- .../bli_addv_sifive_rvv_intr_real.c} | 2 +- .../bli_amaxv_sifive_rvv_intr.c} | 28 +-- .../bli_amaxv_sifive_rvv_intr_complex.c} | 0 .../bli_amaxv_sifive_rvv_intr_real.c} | 0 .../bli_axpbyv_sifive_rvv_intr.c} | 18 +- .../bli_axpbyv_sifive_rvv_intr_complex.c} | 2 +- .../bli_axpbyv_sifive_rvv_intr_real.c} | 2 +- .../bli_axpyv_sifive_rvv_intr.c} | 12 +- .../bli_axpyv_sifive_rvv_intr_complex.c} | 2 +- .../bli_axpyv_sifive_rvv_intr_real.c} | 2 +- .../bli_copyv_sifive_rvv_intr.c} | 10 +- .../bli_copyv_sifive_rvv_intr_complex.c} | 0 .../bli_copyv_sifive_rvv_intr_real.c} | 0 .../bli_dotv_sifive_rvv_intr.c} | 12 +- .../bli_dotv_sifive_rvv_intr_complex.c} | 2 +- .../bli_dotv_sifive_rvv_intr_real.c} | 2 +- .../bli_dotxv_sifive_rvv_intr.c} | 12 +- .../bli_dotxv_sifive_rvv_intr_complex.c} | 2 +- .../bli_dotxv_sifive_rvv_intr_real.c} | 2 +- .../bli_invertv_sifive_rvv_intr.c} | 10 +- .../bli_invertv_sifive_rvv_intr_complex.c} | 0 .../bli_invertv_sifive_rvv_intr_real.c} | 0 .../bli_invscalv_sifive_rvv_intr.c} | 10 +- .../bli_invscalv_sifive_rvv_intr_complex.c} | 0 .../bli_invscalv_sifive_rvv_intr_real.c} | 0 .../bli_scal2v_sifive_rvv_intr.c} | 16 +- .../bli_scal2v_sifive_rvv_intr_complex.c} | 2 +- .../bli_scal2v_sifive_rvv_intr_real.c} | 2 +- .../bli_scalv_sifive_rvv_intr.c} | 14 +- .../bli_scalv_sifive_rvv_intr_complex.c} | 2 +- .../bli_scalv_sifive_rvv_intr_real.c} | 2 +- .../bli_setv_sifive_rvv_intr.c} | 10 +- .../bli_setv_sifive_rvv_intr_complex.c} | 0 .../bli_setv_sifive_rvv_intr_real.c} | 0 .../bli_subv_sifive_rvv_intr.c} | 12 +- .../bli_subv_sifive_rvv_intr_complex.c} | 2 +- .../bli_subv_sifive_rvv_intr_real.c} | 2 +- .../bli_swapv_sifive_rvv_intr.c} | 10 +- .../bli_swapv_sifive_rvv_intr_complex.c} | 0 .../bli_swapv_sifive_rvv_intr_real.c} | 0 .../bli_xpbyv_sifive_rvv_intr.c} | 14 +- .../bli_xpbyv_sifive_rvv_intr_complex.c} | 2 +- .../bli_xpbyv_sifive_rvv_intr_real.c} | 2 +- .../bli_axpy2v_sifive_rvv_intr.c} | 12 +- .../bli_axpy2v_sifive_rvv_intr_complex.c} | 2 +- .../bli_axpy2v_sifive_rvv_intr_real.c} | 2 +- .../bli_axpyf_sifive_rvv_intr.c} | 10 +- .../bli_axpyf_sifive_rvv_intr_complex.c} | 0 .../bli_axpyf_sifive_rvv_intr_real.c} | 0 .../bli_dotaxpyv_sifive_rvv_intr.c} | 12 +- .../bli_dotaxpyv_sifive_rvv_intr_complex.c} | 2 +- .../bli_dotaxpyv_sifive_rvv_intr_real.c} | 2 +- .../bli_dotxaxpyf_sifive_rvv_intr.c} | 14 +- .../bli_dotxaxpyf_sifive_rvv_intr_complex.c} | 134 +++++----- .../bli_dotxaxpyf_sifive_rvv_intr_real.c} | 86 +++---- .../bli_dotxf_sifive_rvv_intr.c} | 14 +- .../bli_dotxf_sifive_rvv_intr_complex.c} | 126 +++++----- .../bli_dotxf_sifive_rvv_intr_real.c} | 110 ++++----- .../bli_packm_sifive_rvv_intr.c} | 25 +- .../bli_packm_sifive_rvv_intr_complex.c} | 19 +- .../bli_packm_sifive_rvv_intr_real.c} | 22 +- .../bli_gemm_sifive_rvv_intr.c} | 18 +- .../bli_gemm_sifive_rvv_intr_complex.c} | 0 .../bli_gemm_sifive_rvv_intr_real.c} | 0 .../bli_gemmtrsm_sifive_rvv_intr.c} | 22 +- .../bli_gemmtrsm_sifive_rvv_intr_complex.c} | 2 +- .../bli_gemmtrsm_sifive_rvv_intr_real.c} | 2 +- kernels/sifive_rvv/bli_kernels_sifive_rvv.h | 162 ++++++++++++ .../riscv_cmul_macros_intr.h | 0 .../riscv_overloaded_intrinsics.h | 2 +- kernels/sifive_x280/bli_kernels_sifive_x280.h | 162 ------------ kernels/sifive_x280/riscv_cmul_macros_asm.h | 137 ----------- 85 files changed, 1129 insertions(+), 851 deletions(-) create mode 100644 config/sifive_rvv/bli_cntx_init_sifive_rvv.c create mode 100644 config/sifive_rvv/bli_family_sifive_rvv.h create mode 100644 config/sifive_rvv/bli_kernel_defs_sifive_rvv.h create mode 100644 config/sifive_rvv/make_defs.mk rename kernels/{sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr.c => sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr_complex.c} (98%) rename kernels/{sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr_real.c => sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr.c => sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr.c} (86%) rename kernels/{sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr_complex.c} (100%) rename kernels/{sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr_real.c => sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr_real.c} (100%) rename kernels/{sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr.c => sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr_real.c => sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr.c => sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr_real.c => sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr.c => sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr.c} (93%) rename kernels/{sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr_complex.c} (100%) rename kernels/{sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr_real.c => sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr_real.c} (100%) rename kernels/{sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr.c => sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr_real.c => sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr.c => sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr.c} (93%) rename kernels/{sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr_real.c => sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr.c => sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr.c} (93%) rename kernels/{sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr_complex.c} (100%) rename kernels/{sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr_real.c => sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr_real.c} (100%) rename kernels/{sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr.c => sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr.c} (93%) rename kernels/{sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr_complex.c} (100%) rename kernels/{sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr_real.c => sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr_real.c} (100%) rename kernels/{sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr.c => sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr_complex.c => sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr_real.c => sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr.c => sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr_complex.c} (98%) rename kernels/{sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr_real.c => sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr.c => sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr.c} (93%) rename kernels/{sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr_complex.c} (100%) rename kernels/{sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr_real.c => sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr_real.c} (100%) rename kernels/{sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr.c => sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr_complex.c} (98%) rename kernels/{sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr_real.c => sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr.c => sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr.c} (93%) rename kernels/{sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr_complex.c} (100%) rename kernels/{sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr_real.c => sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr_real.c} (100%) rename kernels/{sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr.c => sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr_complex.c => sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr_real.c => sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr.c => sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr_complex.c => sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr_real.c => sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr.c => sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr.c} (94%) rename kernels/{sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr_complex.c => sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr_complex.c} (100%) rename kernels/{sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr_real.c => sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr_real.c} (100%) rename kernels/{sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr.c => sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr.c} (92%) rename kernels/{sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr_complex.c => sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr_real.c => sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr_real.c} (99%) rename kernels/{sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr.c => sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr.c} (93%) rename kernels/{sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr_complex.c => sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr_complex.c} (76%) rename kernels/{sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr_real.c => sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr_real.c} (79%) rename kernels/{sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr.c => sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr.c} (94%) rename kernels/{sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr_complex.c => sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr_complex.c} (74%) rename kernels/{sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr_real.c => sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr_real.c} (72%) rename kernels/{sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr.c => sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c} (86%) rename kernels/{sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr_complex.c => sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr_real.c => sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr_real.c} (98%) rename kernels/{sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr.c => sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c} (90%) rename kernels/{sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr_complex.c => sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr_complex.c} (100%) rename kernels/{sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr_real.c => sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr_real.c} (100%) rename kernels/{sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr.c => sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr.c} (89%) rename kernels/{sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr_complex.c => sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr_complex.c} (99%) rename kernels/{sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr_real.c => sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr_real.c} (99%) create mode 100644 kernels/sifive_rvv/bli_kernels_sifive_rvv.h rename kernels/{sifive_x280 => sifive_rvv}/riscv_cmul_macros_intr.h (100%) rename kernels/{sifive_x280 => sifive_rvv}/riscv_overloaded_intrinsics.h (99%) delete mode 100644 kernels/sifive_x280/bli_kernels_sifive_x280.h delete mode 100644 kernels/sifive_x280/riscv_cmul_macros_asm.h diff --git a/config/sifive_rvv/bli_cntx_init_sifive_rvv.c b/config/sifive_rvv/bli_cntx_init_sifive_rvv.c new file mode 100644 index 0000000000..222a837434 --- /dev/null +++ b/config/sifive_rvv/bli_cntx_init_sifive_rvv.c @@ -0,0 +1,222 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2024, SiFive, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_cntx_init_sifive_rvv( cntx_t* cntx ) +{ + blksz_t blkszs[ BLIS_NUM_BLKSZS ]; + + // Set default kernel blocksizes and functions. + bli_cntx_init_sifive_rvv_ref( cntx ); + + // ------------------------------------------------------------------------- + + // Update the context with optimized native kernels. + bli_cntx_set_ukrs + ( + cntx, + + // Level 1 + BLIS_ADDV_KER, BLIS_FLOAT, bli_saddv_sifive_rvv_intr, + BLIS_ADDV_KER, BLIS_DOUBLE, bli_daddv_sifive_rvv_intr, + BLIS_ADDV_KER, BLIS_SCOMPLEX, bli_caddv_sifive_rvv_intr, + BLIS_ADDV_KER, BLIS_DCOMPLEX, bli_zaddv_sifive_rvv_intr, + + BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_rvv_intr, + BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_rvv_intr, + BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_rvv_intr, + BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_rvv_intr, + + BLIS_AXPBYV_KER, BLIS_FLOAT, bli_saxpbyv_sifive_rvv_intr, + BLIS_AXPBYV_KER, BLIS_DOUBLE, bli_daxpbyv_sifive_rvv_intr, + BLIS_AXPBYV_KER, BLIS_SCOMPLEX, bli_caxpbyv_sifive_rvv_intr, + BLIS_AXPBYV_KER, BLIS_DCOMPLEX, bli_zaxpbyv_sifive_rvv_intr, + + BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_sifive_rvv_intr, + BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_sifive_rvv_intr, + BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_sifive_rvv_intr, + BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_sifive_rvv_intr, + + BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_rvv_intr, + BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_rvv_intr, + BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_rvv_intr, + BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_rvv_intr, + + BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_sifive_rvv_intr, + BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_sifive_rvv_intr, + BLIS_DOTV_KER, BLIS_SCOMPLEX, bli_cdotv_sifive_rvv_intr, + BLIS_DOTV_KER, BLIS_DCOMPLEX, bli_zdotv_sifive_rvv_intr, + + BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_sifive_rvv_intr, + BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_sifive_rvv_intr, + BLIS_DOTXV_KER, BLIS_SCOMPLEX, bli_cdotxv_sifive_rvv_intr, + BLIS_DOTXV_KER, BLIS_DCOMPLEX, bli_zdotxv_sifive_rvv_intr, + + BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_rvv_intr, + BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_rvv_intr, + BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_rvv_intr, + BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_rvv_intr, + + BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_rvv_intr, + BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_rvv_intr, + BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_rvv_intr, + BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_rvv_intr, + + BLIS_SCAL2V_KER, BLIS_FLOAT, bli_sscal2v_sifive_rvv_intr, + BLIS_SCAL2V_KER, BLIS_DOUBLE, bli_dscal2v_sifive_rvv_intr, + BLIS_SCAL2V_KER, BLIS_SCOMPLEX, bli_cscal2v_sifive_rvv_intr, + BLIS_SCAL2V_KER, BLIS_DCOMPLEX, bli_zscal2v_sifive_rvv_intr, + + BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_sifive_rvv_intr, + BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_sifive_rvv_intr, + BLIS_SCALV_KER, BLIS_SCOMPLEX, bli_cscalv_sifive_rvv_intr, + BLIS_SCALV_KER, BLIS_DCOMPLEX, bli_zscalv_sifive_rvv_intr, + + BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_rvv_intr, + BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_rvv_intr, + BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_rvv_intr, + BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_rvv_intr, + + BLIS_SUBV_KER, BLIS_FLOAT, bli_ssubv_sifive_rvv_intr, + BLIS_SUBV_KER, BLIS_DOUBLE, bli_dsubv_sifive_rvv_intr, + BLIS_SUBV_KER, BLIS_SCOMPLEX, bli_csubv_sifive_rvv_intr, + BLIS_SUBV_KER, BLIS_DCOMPLEX, bli_zsubv_sifive_rvv_intr, + + BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_rvv_intr, + BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_rvv_intr, + BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_rvv_intr, + BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_rvv_intr, + + BLIS_XPBYV_KER, BLIS_FLOAT, bli_sxpbyv_sifive_rvv_intr, + BLIS_XPBYV_KER, BLIS_DOUBLE, bli_dxpbyv_sifive_rvv_intr, + BLIS_XPBYV_KER, BLIS_SCOMPLEX, bli_cxpbyv_sifive_rvv_intr, + BLIS_XPBYV_KER, BLIS_DCOMPLEX, bli_zxpbyv_sifive_rvv_intr, + + // Level 1f + BLIS_AXPY2V_KER, BLIS_FLOAT, bli_saxpy2v_sifive_rvv_intr, + BLIS_AXPY2V_KER, BLIS_DOUBLE, bli_daxpy2v_sifive_rvv_intr, + BLIS_AXPY2V_KER, BLIS_SCOMPLEX, bli_caxpy2v_sifive_rvv_intr, + BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_sifive_rvv_intr, + + BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_rvv_intr, + BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_rvv_intr, + BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_rvv_intr, + BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_rvv_intr, + + BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_rvv_intr, + BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_rvv_intr, + BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_rvv_intr, + BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_rvv_intr, + + BLIS_DOTAXPYV_KER, BLIS_FLOAT, bli_sdotaxpyv_sifive_rvv_intr, + BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_sifive_rvv_intr, + BLIS_DOTAXPYV_KER, BLIS_SCOMPLEX, bli_cdotaxpyv_sifive_rvv_intr, + BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_sifive_rvv_intr, + + BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_rvv_intr, + BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_rvv_intr, + BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_rvv_intr, + BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_rvv_intr, + + // Level 1m + BLIS_PACKM_KER, BLIS_FLOAT, bli_spackm_sifive_rvv_intr, + BLIS_PACKM_KER, BLIS_DOUBLE, bli_dpackm_sifive_rvv_intr, + BLIS_PACKM_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_rvv_intr, + BLIS_PACKM_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_rvv_intr, + + // Level 3 + BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_rvv_intr, + BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_rvv_intr, + BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_rvv_intr, + BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_rvv_intr, + + BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_rvv_intr, + BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_rvv_intr, + BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_rvv_intr, + BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_rvv_intr, + BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_rvv_intr, + BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_rvv_intr, + BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_rvv_intr, + BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_rvv_intr, + + BLIS_VA_END + ); + + // Update the context with storage preferences. + bli_cntx_set_ukr_prefs + ( + cntx, + + BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, + + BLIS_VA_END + ); + + // Initialize level-3 blocksize objects with architecture-specific values. + // s d c z + bli_blksz_init ( &blkszs[ BLIS_MR ], 7, 7, 6, 6, + 8, 8, 8, 8 ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4 * __riscv_v_min_vlen / 32, 4 * __riscv_v_min_vlen / 64, 2 * __riscv_v_min_vlen / 32, 2 * __riscv_v_min_vlen / 64 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 7, 7, 6, 6 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4 * __riscv_v_min_vlen / 32, 4 * __riscv_v_min_vlen / 64, 2 * __riscv_v_min_vlen / 32, 2 * __riscv_v_min_vlen / 64 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 64, 64, 64, 64 ); + // Default BLIS_BBM_s = 1, but set here to ensure it's correct + bli_blksz_init_easy( &blkszs[ BLIS_BBM ], 1, 1, 1, 1 ); + bli_blksz_init_easy( &blkszs[ BLIS_BBN ], 1, 1, 1, 1 ); + + // Update the context with the current architecture's register and cache + // blocksizes (and multiples) for native execution. + bli_cntx_set_blkszs + ( + cntx, + + // level-3 + BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, + BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, + BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, + BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, + BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, + + // level-1m + BLIS_BBM, &blkszs[ BLIS_BBM ], BLIS_BBM, + BLIS_BBN, &blkszs[ BLIS_BBN ], BLIS_BBN, + + BLIS_VA_END + ); +} + diff --git a/config/sifive_rvv/bli_family_sifive_rvv.h b/config/sifive_rvv/bli_family_sifive_rvv.h new file mode 100644 index 0000000000..708c1960fd --- /dev/null +++ b/config/sifive_rvv/bli_family_sifive_rvv.h @@ -0,0 +1,34 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2024, SiFive, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + diff --git a/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h b/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h new file mode 100644 index 0000000000..c6db9aceb7 --- /dev/null +++ b/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h @@ -0,0 +1,55 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2024, SiFive, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- +#define BLIS_MR_s 7 +#define BLIS_MR_d 7 +#define BLIS_MR_c 6 +#define BLIS_MR_z 6 + +#define BLIS_PACKMR_s 8 +#define BLIS_PACKMR_d 8 +#define BLIS_PACKMR_c 8 +#define BLIS_PACKMR_z 8 + +#define BLIS_NR_s ( 4 * __riscv_v_min_vlen / 32 ) +#define BLIS_NR_d ( 4 * __riscv_v_min_vlen / 64 ) +#define BLIS_NR_c ( 2 * __riscv_v_min_vlen / 32 ) +#define BLIS_NR_z ( 2 * __riscv_v_min_vlen / 64 ) +//#endif + diff --git a/config/sifive_rvv/make_defs.mk b/config/sifive_rvv/make_defs.mk new file mode 100644 index 0000000000..63c2d447fe --- /dev/null +++ b/config/sifive_rvv/make_defs.mk @@ -0,0 +1,80 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2024, SiFive, Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := sifive_rvv +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb_zvl128b -mabi=lp64d +CMISCFLAGS_SIFIVE_OTHER := +CPPROCFLAGS := +CMISCFLAGS := $(CMISCFLAGS_SIFIVE) $(CMISCFLAGS_SIFIVE_OTHER) \ + -fdata-sections -ffunction-sections \ + -fdiagnostics-color=always -fno-rtti -fno-exceptions +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wextra -Wno-unused-function -Wno-unused-parameter \ + -Wno-sign-compare -Wno-unused-variable + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/config/sifive_x280/bli_cntx_init_sifive_x280.c b/config/sifive_x280/bli_cntx_init_sifive_x280.c index 668891cf3f..142ca19278 100644 --- a/config/sifive_x280/bli_cntx_init_sifive_x280.c +++ b/config/sifive_x280/bli_cntx_init_sifive_x280.c @@ -49,127 +49,127 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx ) cntx, // Level 1 - BLIS_ADDV_KER, BLIS_FLOAT, bli_saddv_sifive_x280_intr, - BLIS_ADDV_KER, BLIS_DOUBLE, bli_daddv_sifive_x280_intr, - BLIS_ADDV_KER, BLIS_SCOMPLEX, bli_caddv_sifive_x280_intr, - BLIS_ADDV_KER, BLIS_DCOMPLEX, bli_zaddv_sifive_x280_intr, - - BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_x280_intr, - BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_x280_intr, - BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_x280_intr, - BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_x280_intr, - - BLIS_AXPBYV_KER, BLIS_FLOAT, bli_saxpbyv_sifive_x280_intr, - BLIS_AXPBYV_KER, BLIS_DOUBLE, bli_daxpbyv_sifive_x280_intr, - BLIS_AXPBYV_KER, BLIS_SCOMPLEX, bli_caxpbyv_sifive_x280_intr, - BLIS_AXPBYV_KER, BLIS_DCOMPLEX, bli_zaxpbyv_sifive_x280_intr, - - BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_sifive_x280_intr, - BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_sifive_x280_intr, - BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_sifive_x280_intr, - BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_sifive_x280_intr, - - BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_x280_intr, - BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_x280_intr, - BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_x280_intr, - BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_x280_intr, - - BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_sifive_x280_intr, - BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_sifive_x280_intr, - BLIS_DOTV_KER, BLIS_SCOMPLEX, bli_cdotv_sifive_x280_intr, - BLIS_DOTV_KER, BLIS_DCOMPLEX, bli_zdotv_sifive_x280_intr, - - BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_sifive_x280_intr, - BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_sifive_x280_intr, - BLIS_DOTXV_KER, BLIS_SCOMPLEX, bli_cdotxv_sifive_x280_intr, - BLIS_DOTXV_KER, BLIS_DCOMPLEX, bli_zdotxv_sifive_x280_intr, - - BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_x280_intr, - BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_x280_intr, - BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_x280_intr, - BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_x280_intr, - - BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_x280_intr, - BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_x280_intr, - BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_x280_intr, - BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_x280_intr, - - BLIS_SCAL2V_KER, BLIS_FLOAT, bli_sscal2v_sifive_x280_intr, - BLIS_SCAL2V_KER, BLIS_DOUBLE, bli_dscal2v_sifive_x280_intr, - BLIS_SCAL2V_KER, BLIS_SCOMPLEX, bli_cscal2v_sifive_x280_intr, - BLIS_SCAL2V_KER, BLIS_DCOMPLEX, bli_zscal2v_sifive_x280_intr, - - BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_sifive_x280_intr, - BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_sifive_x280_intr, - BLIS_SCALV_KER, BLIS_SCOMPLEX, bli_cscalv_sifive_x280_intr, - BLIS_SCALV_KER, BLIS_DCOMPLEX, bli_zscalv_sifive_x280_intr, - - BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_x280_intr, - BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_x280_intr, - BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_x280_intr, - BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_x280_intr, - - BLIS_SUBV_KER, BLIS_FLOAT, bli_ssubv_sifive_x280_intr, - BLIS_SUBV_KER, BLIS_DOUBLE, bli_dsubv_sifive_x280_intr, - BLIS_SUBV_KER, BLIS_SCOMPLEX, bli_csubv_sifive_x280_intr, - BLIS_SUBV_KER, BLIS_DCOMPLEX, bli_zsubv_sifive_x280_intr, - - BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_x280_intr, - BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_x280_intr, - BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_x280_intr, - BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_x280_intr, - - BLIS_XPBYV_KER, BLIS_FLOAT, bli_sxpbyv_sifive_x280_intr, - BLIS_XPBYV_KER, BLIS_DOUBLE, bli_dxpbyv_sifive_x280_intr, - BLIS_XPBYV_KER, BLIS_SCOMPLEX, bli_cxpbyv_sifive_x280_intr, - BLIS_XPBYV_KER, BLIS_DCOMPLEX, bli_zxpbyv_sifive_x280_intr, + BLIS_ADDV_KER, BLIS_FLOAT, bli_saddv_sifive_rvv_intr, + BLIS_ADDV_KER, BLIS_DOUBLE, bli_daddv_sifive_rvv_intr, + BLIS_ADDV_KER, BLIS_SCOMPLEX, bli_caddv_sifive_rvv_intr, + BLIS_ADDV_KER, BLIS_DCOMPLEX, bli_zaddv_sifive_rvv_intr, + + BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_rvv_intr, + BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_rvv_intr, + BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_rvv_intr, + BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_rvv_intr, + + BLIS_AXPBYV_KER, BLIS_FLOAT, bli_saxpbyv_sifive_rvv_intr, + BLIS_AXPBYV_KER, BLIS_DOUBLE, bli_daxpbyv_sifive_rvv_intr, + BLIS_AXPBYV_KER, BLIS_SCOMPLEX, bli_caxpbyv_sifive_rvv_intr, + BLIS_AXPBYV_KER, BLIS_DCOMPLEX, bli_zaxpbyv_sifive_rvv_intr, + + BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_sifive_rvv_intr, + BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_sifive_rvv_intr, + BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_sifive_rvv_intr, + BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_sifive_rvv_intr, + + BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_rvv_intr, + BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_rvv_intr, + BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_rvv_intr, + BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_rvv_intr, + + BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_sifive_rvv_intr, + BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_sifive_rvv_intr, + BLIS_DOTV_KER, BLIS_SCOMPLEX, bli_cdotv_sifive_rvv_intr, + BLIS_DOTV_KER, BLIS_DCOMPLEX, bli_zdotv_sifive_rvv_intr, + + BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_sifive_rvv_intr, + BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_sifive_rvv_intr, + BLIS_DOTXV_KER, BLIS_SCOMPLEX, bli_cdotxv_sifive_rvv_intr, + BLIS_DOTXV_KER, BLIS_DCOMPLEX, bli_zdotxv_sifive_rvv_intr, + + BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_rvv_intr, + BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_rvv_intr, + BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_rvv_intr, + BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_rvv_intr, + + BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_rvv_intr, + BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_rvv_intr, + BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_rvv_intr, + BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_rvv_intr, + + BLIS_SCAL2V_KER, BLIS_FLOAT, bli_sscal2v_sifive_rvv_intr, + BLIS_SCAL2V_KER, BLIS_DOUBLE, bli_dscal2v_sifive_rvv_intr, + BLIS_SCAL2V_KER, BLIS_SCOMPLEX, bli_cscal2v_sifive_rvv_intr, + BLIS_SCAL2V_KER, BLIS_DCOMPLEX, bli_zscal2v_sifive_rvv_intr, + + BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_sifive_rvv_intr, + BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_sifive_rvv_intr, + BLIS_SCALV_KER, BLIS_SCOMPLEX, bli_cscalv_sifive_rvv_intr, + BLIS_SCALV_KER, BLIS_DCOMPLEX, bli_zscalv_sifive_rvv_intr, + + BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_rvv_intr, + BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_rvv_intr, + BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_rvv_intr, + BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_rvv_intr, + + BLIS_SUBV_KER, BLIS_FLOAT, bli_ssubv_sifive_rvv_intr, + BLIS_SUBV_KER, BLIS_DOUBLE, bli_dsubv_sifive_rvv_intr, + BLIS_SUBV_KER, BLIS_SCOMPLEX, bli_csubv_sifive_rvv_intr, + BLIS_SUBV_KER, BLIS_DCOMPLEX, bli_zsubv_sifive_rvv_intr, + + BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_rvv_intr, + BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_rvv_intr, + BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_rvv_intr, + BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_rvv_intr, + + BLIS_XPBYV_KER, BLIS_FLOAT, bli_sxpbyv_sifive_rvv_intr, + BLIS_XPBYV_KER, BLIS_DOUBLE, bli_dxpbyv_sifive_rvv_intr, + BLIS_XPBYV_KER, BLIS_SCOMPLEX, bli_cxpbyv_sifive_rvv_intr, + BLIS_XPBYV_KER, BLIS_DCOMPLEX, bli_zxpbyv_sifive_rvv_intr, // Level 1f - BLIS_AXPY2V_KER, BLIS_FLOAT, bli_saxpy2v_sifive_x280_intr, - BLIS_AXPY2V_KER, BLIS_DOUBLE, bli_daxpy2v_sifive_x280_intr, - BLIS_AXPY2V_KER, BLIS_SCOMPLEX, bli_caxpy2v_sifive_x280_intr, - BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_sifive_x280_intr, - - BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_x280_intr, - BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_x280_intr, - BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_x280_intr, - BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_x280_intr, - - BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_x280_intr, - BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_x280_intr, - BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_x280_intr, - BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_x280_intr, - - BLIS_DOTAXPYV_KER, BLIS_FLOAT, bli_sdotaxpyv_sifive_x280_intr, - BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_sifive_x280_intr, - BLIS_DOTAXPYV_KER, BLIS_SCOMPLEX, bli_cdotaxpyv_sifive_x280_intr, - BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_sifive_x280_intr, - - BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_x280_intr, - BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_x280_intr, - BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_x280_intr, - BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_x280_intr, + BLIS_AXPY2V_KER, BLIS_FLOAT, bli_saxpy2v_sifive_rvv_intr, + BLIS_AXPY2V_KER, BLIS_DOUBLE, bli_daxpy2v_sifive_rvv_intr, + BLIS_AXPY2V_KER, BLIS_SCOMPLEX, bli_caxpy2v_sifive_rvv_intr, + BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_sifive_rvv_intr, + + BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_rvv_intr, + BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_rvv_intr, + BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_rvv_intr, + BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_rvv_intr, + + BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_rvv_intr, + BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_rvv_intr, + BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_rvv_intr, + BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_rvv_intr, + + BLIS_DOTAXPYV_KER, BLIS_FLOAT, bli_sdotaxpyv_sifive_rvv_intr, + BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_sifive_rvv_intr, + BLIS_DOTAXPYV_KER, BLIS_SCOMPLEX, bli_cdotaxpyv_sifive_rvv_intr, + BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_sifive_rvv_intr, + + BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_rvv_intr, + BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_rvv_intr, + BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_rvv_intr, + BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_rvv_intr, // Level 1m - BLIS_PACKM_KER, BLIS_FLOAT, bli_spackm_sifive_x280_intr, - BLIS_PACKM_KER, BLIS_DOUBLE, bli_dpackm_sifive_x280_intr, - BLIS_PACKM_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_x280_intr, - BLIS_PACKM_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_x280_intr, + BLIS_PACKM_KER, BLIS_FLOAT, bli_spackm_sifive_rvv_intr, + BLIS_PACKM_KER, BLIS_DOUBLE, bli_dpackm_sifive_rvv_intr, + BLIS_PACKM_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_rvv_intr, + BLIS_PACKM_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_rvv_intr, // Level 3 - BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_x280_intr, - BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_x280_intr, - BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_x280_intr, - BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_x280_intr, - - BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_x280_intr, - BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_x280_intr, - BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_x280_intr, - BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_x280_intr, - BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_x280_intr, - BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_x280_intr, - BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_x280_intr, - BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_x280_intr, + BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_rvv_intr, + BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_rvv_intr, + BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_rvv_intr, + BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_rvv_intr, + + BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_rvv_intr, + BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_rvv_intr, + BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_rvv_intr, + BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_rvv_intr, + BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_rvv_intr, + BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_rvv_intr, + BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_rvv_intr, + BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_rvv_intr, BLIS_VA_END ); diff --git a/config/sifive_x280/make_defs.mk b/config/sifive_x280/make_defs.mk index 31b31e387a..5f19e4e442 100644 --- a/config/sifive_x280/make_defs.mk +++ b/config/sifive_x280/make_defs.mk @@ -47,8 +47,10 @@ THIS_CONFIG := sifive_x280 # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb_zvl512b -mabi=lp64d +CMISCFLAGS_SIFIVE_OTHER := CPPROCFLAGS := -CMISCFLAGS := $(CMISCFLAGS_SIFIVE) -fdata-sections -ffunction-sections \ +CMISCFLAGS := $(CMISCFLAGS_SIFIVE) $(CMISCFLAGS_SIFIVE_OTHER) \ + -fdata-sections -ffunction-sections \ -fdiagnostics-color=always -fno-rtti -fno-exceptions CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wextra -Wno-unused-function -Wno-unused-parameter \ diff --git a/config_registry b/config_registry index 8c1f6f2542..8154393487 100644 --- a/config_registry +++ b/config_registry @@ -62,7 +62,8 @@ rv32iv: rv32iv/rviv rv64iv: rv64iv/rviv # SiFive architectures. -sifive_x280: sifive_x280 +sifive_rvv: sifive_rvv +sifive_x280: sifive_x280/sifive_rvv # Generic architectures. generic: generic diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index 135d410635..53d9bdefdd 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -287,6 +287,9 @@ arch_t bli_arch_query_id_impl( void ) #endif // SiFive microarchitectures. + #ifdef BLIS_FAMILY_SIFIVE_RVV + id = BLIS_ARCH_SIFIVE_RVV; + #endif #ifdef BLIS_FAMILY_SIFIVE_X280 id = BLIS_ARCH_SIFIVE_X280; #endif @@ -356,6 +359,7 @@ static const char* config_name[ BLIS_NUM_ARCHS ] = "rv32iv", "rv64iv", + "sifive_rvv", "sifive_x280", "generic" diff --git a/frame/include/bli_arch_config.h b/frame/include/bli_arch_config.h index a35bb7746b..49a8943024 100644 --- a/frame/include/bli_arch_config.h +++ b/frame/include/bli_arch_config.h @@ -180,6 +180,9 @@ INSERT_GENTCONF // -- SiFive families -- +#ifdef BLIS_FAMILY_SIFIVE_RVV +#include "bli_family_sifive_rvv.h" +#endif #ifdef BLIS_FAMILY_SIFIVE_X280 #include "bli_family_sifive_x280.h" #endif @@ -277,6 +280,9 @@ INSERT_GENTCONF // -- SiFive RISC-V architectures -- +#ifdef BLIS_KERNELS_SIFIVE_RVV +#include "bli_kernels_sifive_rvv.h" +#endif #ifdef BLIS_KERNELS_SIFIVE_X280 #include "bli_kernels_sifive_x280.h" #endif diff --git a/frame/include/bli_gentconf_macro_defs.h b/frame/include/bli_gentconf_macro_defs.h index 70414fb475..f6f3af20e8 100644 --- a/frame/include/bli_gentconf_macro_defs.h +++ b/frame/include/bli_gentconf_macro_defs.h @@ -222,6 +222,11 @@ // -- SiFive architectures ---------------------------------------------------- +#ifdef BLIS_CONFIG_SIFIVE_RVV +#define INSERT_GENTCONF_SIFIVE_RVV GENTCONF( SIFIVE_RVV, sifive_rvv ) +#else +#define INSERT_GENTCONF_SIFIVE_RVV +#endif #ifdef BLIS_CONFIG_SIFIVE_X280 #define INSERT_GENTCONF_SIFIVE_X280 GENTCONF( SIFIVE_X280, sifive_x280 ) #else @@ -280,6 +285,7 @@ INSERT_GENTCONF_RV64I \ INSERT_GENTCONF_RV32IV \ INSERT_GENTCONF_RV64IV \ \ +INSERT_GENTCONF_SIFIVE_RVV \ INSERT_GENTCONF_SIFIVE_X280 \ \ INSERT_GENTCONF_GENERIC diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index df0b2a4252..4b9675cb3c 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -1003,6 +1003,7 @@ typedef enum BLIS_ARCH_RV64IV, // SiFive + BLIS_ARCH_SIFIVE_RVV, BLIS_ARCH_SIFIVE_X280, // Generic architecture/configuration diff --git a/kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr.c index 2b7ad6fe7d..c917390f9c 100644 --- a/kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define ADDV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##addv_sifive_x280_intr(\ +#define ADDV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##addv_sifive_rvv_intr(\ conj_t conjx, \ dim_t n, \ const T* restrict x_, inc_t incx, \ @@ -57,7 +57,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_addv_sifive_x280_intr_real.c" +#include "./bli_addv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -72,7 +72,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_addv_sifive_x280_intr_real.c" +#include "./bli_addv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -88,7 +88,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_addv_sifive_x280_intr_complex.c" +#include "./bli_addv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -105,7 +105,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_addv_sifive_x280_intr_complex.c" +#include "./bli_addv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr_complex.c similarity index 98% rename from kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr_complex.c index d5343befe0..ae4ff39b97 100644 --- a/kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr_real.c index d4e7d4a45e..bc928a5e6b 100644 --- a/kernels/sifive_x280/1/bli_addv_sifive_x280_intr/bli_addv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_addv_sifive_rvv_intr/bli_addv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr.c similarity index 86% rename from kernels/sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr.c index 4f7d546304..6014b860b6 100644 --- a/kernels/sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr.c @@ -40,7 +40,7 @@ #include #include -#define AMAXV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##amaxv_sifive_x280_intr(\ +#define AMAXV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##amaxv_sifive_rvv_intr(\ dim_t n, \ const T* restrict x_, inc_t incx, \ dim_t* index, \ @@ -52,20 +52,20 @@ // BLIS defines integers to be 32 or 64 bits according to BLIS_INT_TYPE_SIZE. // If BLIS_INT_TYPE_SIZE is any other value, integers are defined to be longs. #if BLIS_INT_TYPE_SIZE == 32 || BLIS_INT_TYPE_SIZE == 64 -#define AMAXV_SIFIVE_X280_INT_SIZE BLIS_INT_TYPE_SIZE +#define AMAXV_SIFIVE_RVV_INT_SIZE BLIS_INT_TYPE_SIZE #elif LONG_MAX == INT32_MAX -#define AMAXV_SIFIVE_X280_INT_SIZE 32 +#define AMAXV_SIFIVE_RVV_INT_SIZE 32 #elif LONG_MAX == INT64_MAX -#define AMAXV_SIFIVE_X280_INT_SIZE 64 +#define AMAXV_SIFIVE_RVV_INT_SIZE 64 #else -#error "Integers must be 32- or 64-bits for bli_?amaxv_sifive_x280_intr." +#error "Integers must be 32- or 64-bits for bli_?amaxv_sifive_rvv_intr." #endif // Single precision real #define DATATYPE float #define PRECISION_CHAR s #define PREC_X 32 -#define PREC_I AMAXV_SIFIVE_X280_INT_SIZE +#define PREC_I AMAXV_SIFIVE_RVV_INT_SIZE #if PREC_I == 32 #define LMUL_X m4 #define LMUL_I m4 @@ -77,7 +77,7 @@ #endif #define FLT_SIZE sizeof(float) -#include "./bli_amaxv_sifive_x280_intr_real.c" +#include "./bli_amaxv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -92,7 +92,7 @@ #define DATATYPE double #define PRECISION_CHAR d #define PREC_X 64 -#define PREC_I AMAXV_SIFIVE_X280_INT_SIZE +#define PREC_I AMAXV_SIFIVE_RVV_INT_SIZE #if PREC_I == 32 #define LMUL_X m8 #define LMUL_I m4 @@ -104,7 +104,7 @@ #endif #define FLT_SIZE sizeof(double) -#include "./bli_amaxv_sifive_x280_intr_real.c" +#include "./bli_amaxv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -120,7 +120,7 @@ #define BASE_DT float #define PRECISION_CHAR c #define PREC_X 32 -#define PREC_I AMAXV_SIFIVE_X280_INT_SIZE +#define PREC_I AMAXV_SIFIVE_RVV_INT_SIZE #if PREC_I == 32 #define LMUL_X m4 #define LMUL_I m4 @@ -132,7 +132,7 @@ #endif #define FLT_SIZE sizeof(float) -#include "./bli_amaxv_sifive_x280_intr_complex.c" +#include "./bli_amaxv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -149,7 +149,7 @@ #define BASE_DT double #define PRECISION_CHAR z #define PREC_X 64 -#define PREC_I AMAXV_SIFIVE_X280_INT_SIZE +#define PREC_I AMAXV_SIFIVE_RVV_INT_SIZE #if PREC_I == 32 #define LMUL_X m8 #define LMUL_I m4 @@ -161,7 +161,7 @@ #endif #define FLT_SIZE sizeof(double) -#include "./bli_amaxv_sifive_x280_intr_complex.c" +#include "./bli_amaxv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -173,7 +173,7 @@ #undef RATIO #undef FLT_SIZE -#undef AMAXV_SIFIVE_X280_INT_SIZE +#undef AMAXV_SIFIVE_RVV_INT_SIZE #undef AMAXV #undef AMAXV_ diff --git a/kernels/sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr_complex.c similarity index 100% rename from kernels/sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr_complex.c diff --git a/kernels/sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr_real.c similarity index 100% rename from kernels/sifive_x280/1/bli_amaxv_sifive_x280_intr/bli_amaxv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_amaxv_sifive_rvv_intr/bli_amaxv_sifive_rvv_intr_real.c diff --git a/kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr.c index 389292f90f..94e3272bc4 100644 --- a/kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define AXPBYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##axpbyv_sifive_x280_intr(\ +#define AXPBYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##axpbyv_sifive_rvv_intr(\ conj_t conjx, \ dim_t n, \ const T* restrict alpha_, \ @@ -52,11 +52,11 @@ #define AXPBYV(...) AXPBYV_(__VA_ARGS__) -#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_x280_intr +#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_rvv_intr #define SETV(PRECISION_CHAR) SETV_(PRECISION_CHAR) -#define SCALV_(PRECISION_CHAR) bli_##PRECISION_CHAR##scalv_sifive_x280_intr +#define SCALV_(PRECISION_CHAR) bli_##PRECISION_CHAR##scalv_sifive_rvv_intr #define SCALV(PRECISION_CHAR) SCALV_(PRECISION_CHAR) -#define SCAL2V_(PRECISION_CHAR) bli_##PRECISION_CHAR##scal2v_sifive_x280_intr +#define SCAL2V_(PRECISION_CHAR) bli_##PRECISION_CHAR##scal2v_sifive_rvv_intr #define SCAL2V(PRECISION_CHAR) SCAL2V_(PRECISION_CHAR) // Single precision real @@ -66,7 +66,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_axpbyv_sifive_x280_intr_real.c" +#include "./bli_axpbyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -81,7 +81,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_axpbyv_sifive_x280_intr_real.c" +#include "./bli_axpbyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -97,7 +97,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_axpbyv_sifive_x280_intr_complex.c" +#include "./bli_axpbyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -114,7 +114,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_axpbyv_sifive_x280_intr_complex.c" +#include "./bli_axpbyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr_complex.c index 31fc584b97..af034824e1 100644 --- a/kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr_real.c index 33eafc5d12..b482189028 100644 --- a/kernels/sifive_x280/1/bli_axpbyv_sifive_x280_intr/bli_axpbyv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_axpbyv_sifive_rvv_intr/bli_axpbyv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr.c index 3f9ebd3b04..07dc6a416b 100644 --- a/kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define AXPYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##axpyv_sifive_x280_intr(\ +#define AXPYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##axpyv_sifive_rvv_intr(\ conj_t conjx, \ dim_t n, \ const T* restrict alpha_, \ @@ -58,7 +58,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_axpyv_sifive_x280_intr_real.c" +#include "./bli_axpyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -73,7 +73,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_axpyv_sifive_x280_intr_real.c" +#include "./bli_axpyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -89,7 +89,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_axpyv_sifive_x280_intr_complex.c" +#include "./bli_axpyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -106,7 +106,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_axpyv_sifive_x280_intr_complex.c" +#include "./bli_axpyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr_complex.c index dc520d2125..1b88f7d260 100644 --- a/kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr_real.c index 0c2cda842f..8ad0ac3fb6 100644 --- a/kernels/sifive_x280/1/bli_axpyv_sifive_x280_intr/bli_axpyv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_axpyv_sifive_rvv_intr/bli_axpyv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr.c similarity index 93% rename from kernels/sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr.c index e030d85ff3..ab9cf0f346 100644 --- a/kernels/sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr.c @@ -38,7 +38,7 @@ #include #include -#define COPYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##copyv_sifive_x280_intr(\ +#define COPYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##copyv_sifive_rvv_intr(\ conj_t conjx, \ dim_t n, \ const T* restrict x_, inc_t incx, \ @@ -55,7 +55,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_copyv_sifive_x280_intr_real.c" +#include "./bli_copyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -70,7 +70,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_copyv_sifive_x280_intr_real.c" +#include "./bli_copyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -86,7 +86,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_copyv_sifive_x280_intr_complex.c" +#include "./bli_copyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -103,7 +103,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_copyv_sifive_x280_intr_complex.c" +#include "./bli_copyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr_complex.c similarity index 100% rename from kernels/sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr_complex.c diff --git a/kernels/sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr_real.c similarity index 100% rename from kernels/sifive_x280/1/bli_copyv_sifive_x280_intr/bli_copyv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_copyv_sifive_rvv_intr/bli_copyv_sifive_rvv_intr_real.c diff --git a/kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr.c index 0dc8565400..31ae4cc26b 100644 --- a/kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define DOTV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotv_sifive_x280_intr(\ +#define DOTV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotv_sifive_rvv_intr(\ conj_t conjxt, \ conj_t conjy, \ dim_t n, \ @@ -59,7 +59,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_dotv_sifive_x280_intr_real.c" +#include "./bli_dotv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -74,7 +74,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_dotv_sifive_x280_intr_real.c" +#include "./bli_dotv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -90,7 +90,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_dotv_sifive_x280_intr_complex.c" +#include "./bli_dotv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -107,7 +107,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_dotv_sifive_x280_intr_complex.c" +#include "./bli_dotv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr_complex.c index 250fab46e6..14dbfc4e9a 100644 --- a/kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr_real.c index 0ec8e6328a..b7aec00fd1 100644 --- a/kernels/sifive_x280/1/bli_dotv_sifive_x280_intr/bli_dotv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_dotv_sifive_rvv_intr/bli_dotv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr.c similarity index 93% rename from kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr.c index 048f8d2983..ad405979ca 100644 --- a/kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define DOTXV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotxv_sifive_x280_intr(\ +#define DOTXV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotxv_sifive_rvv_intr(\ conj_t conjxt, \ conj_t conjy, \ dim_t n, \ @@ -62,7 +62,7 @@ #define FLT_SIZE sizeof(float) #define FMA fmaf -#include "./bli_dotxv_sifive_x280_intr_real.c" +#include "./bli_dotxv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -79,7 +79,7 @@ #define FLT_SIZE sizeof(double) #define FMA fma -#include "./bli_dotxv_sifive_x280_intr_real.c" +#include "./bli_dotxv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -97,7 +97,7 @@ #define FLT_SIZE sizeof(float) #define FMA fmaf -#include "./bli_dotxv_sifive_x280_intr_complex.c" +#include "./bli_dotxv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -116,7 +116,7 @@ #define FLT_SIZE sizeof(double) #define FMA fma -#include "./bli_dotxv_sifive_x280_intr_complex.c" +#include "./bli_dotxv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr_complex.c index 8245e8e057..1c6d3d8f7a 100644 --- a/kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr_real.c index f9d9346973..1f84ae610f 100644 --- a/kernels/sifive_x280/1/bli_dotxv_sifive_x280_intr/bli_dotxv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_dotxv_sifive_rvv_intr/bli_dotxv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr.c similarity index 93% rename from kernels/sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr.c index fc8f8a76d7..7f4443479b 100644 --- a/kernels/sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr.c @@ -38,7 +38,7 @@ #include #include -#define INVERTV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##invertv_sifive_x280_intr(\ +#define INVERTV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##invertv_sifive_rvv_intr(\ dim_t n, \ T* restrict x_, inc_t incx, \ const cntx_t* cntx \ @@ -53,7 +53,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_invertv_sifive_x280_intr_real.c" +#include "./bli_invertv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -68,7 +68,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_invertv_sifive_x280_intr_real.c" +#include "./bli_invertv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -85,7 +85,7 @@ #define RATIO 8 #define FLT_SIZE sizeof(float) -#include "./bli_invertv_sifive_x280_intr_complex.c" +#include "./bli_invertv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -104,7 +104,7 @@ #define RATIO 16 #define FLT_SIZE sizeof(double) -#include "./bli_invertv_sifive_x280_intr_complex.c" +#include "./bli_invertv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr_complex.c similarity index 100% rename from kernels/sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr_complex.c diff --git a/kernels/sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr_real.c similarity index 100% rename from kernels/sifive_x280/1/bli_invertv_sifive_x280_intr/bli_invertv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_invertv_sifive_rvv_intr/bli_invertv_sifive_rvv_intr_real.c diff --git a/kernels/sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr.c similarity index 93% rename from kernels/sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr.c index a5c7561bd8..0dc9c01aba 100644 --- a/kernels/sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr.c @@ -39,7 +39,7 @@ #include #include -#define INVSCALV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##invscalv_sifive_x280_intr(\ +#define INVSCALV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##invscalv_sifive_rvv_intr(\ conj_t conjalpha, \ dim_t n, \ const T* restrict alpha_, \ @@ -56,7 +56,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_invscalv_sifive_x280_intr_real.c" +#include "./bli_invscalv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -71,7 +71,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_invscalv_sifive_x280_intr_real.c" +#include "./bli_invscalv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -87,7 +87,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_invscalv_sifive_x280_intr_complex.c" +#include "./bli_invscalv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -104,7 +104,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_invscalv_sifive_x280_intr_complex.c" +#include "./bli_invscalv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr_complex.c similarity index 100% rename from kernels/sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr_complex.c diff --git a/kernels/sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr_real.c similarity index 100% rename from kernels/sifive_x280/1/bli_invscalv_sifive_x280_intr/bli_invscalv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_invscalv_sifive_rvv_intr/bli_invscalv_sifive_rvv_intr_real.c diff --git a/kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr.c index 4cae8257c3..b434f751eb 100644 --- a/kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define SCAL2V_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##scal2v_sifive_x280_intr(\ +#define SCAL2V_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##scal2v_sifive_rvv_intr(\ conj_t conjx, \ dim_t n, \ const T* restrict alpha_, \ @@ -51,9 +51,9 @@ #define SCAL2V(...) SCAL2V_(__VA_ARGS__) -#define COPYV_(PRECISION_CHAR) bli_##PRECISION_CHAR##copyv_sifive_x280_intr +#define COPYV_(PRECISION_CHAR) bli_##PRECISION_CHAR##copyv_sifive_rvv_intr #define COPYV(PRECISION_CHAR) COPYV_(PRECISION_CHAR) -#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_x280_intr +#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_rvv_intr #define SETV(PRECISION_CHAR) SETV_(PRECISION_CHAR) // Single precision real @@ -63,7 +63,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_scal2v_sifive_x280_intr_real.c" +#include "./bli_scal2v_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -78,7 +78,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_scal2v_sifive_x280_intr_real.c" +#include "./bli_scal2v_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -94,7 +94,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_scal2v_sifive_x280_intr_complex.c" +#include "./bli_scal2v_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -111,7 +111,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_scal2v_sifive_x280_intr_complex.c" +#include "./bli_scal2v_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr_complex.c index 2e946a2a4c..c2272ae3bb 100644 --- a/kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr_real.c index 7084e15cf5..7b80882028 100644 --- a/kernels/sifive_x280/1/bli_scal2v_sifive_x280_intr/bli_scal2v_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_scal2v_sifive_rvv_intr/bli_scal2v_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr.c index d1fb9940eb..c6b19ea00b 100644 --- a/kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -39,7 +39,7 @@ #include "blis.h" #include "../../riscv_overloaded_intrinsics.h" -#define SCALV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##scalv_sifive_x280_intr(\ +#define SCALV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##scalv_sifive_rvv_intr(\ conj_t conjalpha, \ dim_t n, \ const T* restrict alpha_, \ @@ -49,7 +49,7 @@ #define SCALV(...) SCALV_(__VA_ARGS__) -#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_x280_intr +#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_rvv_intr #define SETV(PRECISION_CHAR) SETV_(PRECISION_CHAR) // Single precision real @@ -59,7 +59,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_scalv_sifive_x280_intr_real.c" +#include "./bli_scalv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -74,7 +74,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_scalv_sifive_x280_intr_real.c" +#include "./bli_scalv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -90,7 +90,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_scalv_sifive_x280_intr_complex.c" +#include "./bli_scalv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -107,7 +107,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_scalv_sifive_x280_intr_complex.c" +#include "./bli_scalv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr_complex.c similarity index 98% rename from kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr_complex.c index c6803c9676..20f49ebdf6 100644 --- a/kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr_real.c index 2b4e31d359..7cc2dd6b64 100644 --- a/kernels/sifive_x280/1/bli_scalv_sifive_x280_intr/bli_scalv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_scalv_sifive_rvv_intr/bli_scalv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr.c similarity index 93% rename from kernels/sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr.c index 8c2ba7c72a..33cfb4a573 100644 --- a/kernels/sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr.c @@ -38,7 +38,7 @@ #include #include -#define SETV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##setv_sifive_x280_intr(\ +#define SETV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##setv_sifive_rvv_intr(\ conj_t conjalpha, \ dim_t n, \ const T* restrict alpha_, \ @@ -55,7 +55,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_setv_sifive_x280_intr_real.c" +#include "./bli_setv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -70,7 +70,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_setv_sifive_x280_intr_real.c" +#include "./bli_setv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -86,7 +86,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_setv_sifive_x280_intr_complex.c" +#include "./bli_setv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -103,7 +103,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_setv_sifive_x280_intr_complex.c" +#include "./bli_setv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr_complex.c similarity index 100% rename from kernels/sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr_complex.c diff --git a/kernels/sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr_real.c similarity index 100% rename from kernels/sifive_x280/1/bli_setv_sifive_x280_intr/bli_setv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_setv_sifive_rvv_intr/bli_setv_sifive_rvv_intr_real.c diff --git a/kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr.c index e6b483a3f8..0ba7c53041 100644 --- a/kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define SUBV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##subv_sifive_x280_intr(\ +#define SUBV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##subv_sifive_rvv_intr(\ conj_t conjx, \ dim_t n, \ const T* restrict x_, inc_t incx, \ @@ -57,7 +57,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_subv_sifive_x280_intr_real.c" +#include "./bli_subv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -72,7 +72,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_subv_sifive_x280_intr_real.c" +#include "./bli_subv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -88,7 +88,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_subv_sifive_x280_intr_complex.c" +#include "./bli_subv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -105,7 +105,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_subv_sifive_x280_intr_complex.c" +#include "./bli_subv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr_complex.c similarity index 98% rename from kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr_complex.c index 2d4a1a017f..62eab516d4 100644 --- a/kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr_real.c index b158594319..5488007b2b 100644 --- a/kernels/sifive_x280/1/bli_subv_sifive_x280_intr/bli_subv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_subv_sifive_rvv_intr/bli_subv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr.c similarity index 93% rename from kernels/sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr.c index baf685d35f..ec14df9cb6 100644 --- a/kernels/sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr.c @@ -38,7 +38,7 @@ #include #include -#define SWAPV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##swapv_sifive_x280_intr(\ +#define SWAPV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##swapv_sifive_rvv_intr(\ dim_t n, \ T* restrict x_, inc_t incx, \ T* restrict y_, inc_t incy, \ @@ -54,7 +54,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_swapv_sifive_x280_intr_real.c" +#include "./bli_swapv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -69,7 +69,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_swapv_sifive_x280_intr_real.c" +#include "./bli_swapv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -85,7 +85,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_swapv_sifive_x280_intr_complex.c" +#include "./bli_swapv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -102,7 +102,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_swapv_sifive_x280_intr_complex.c" +#include "./bli_swapv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr_complex.c similarity index 100% rename from kernels/sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr_complex.c diff --git a/kernels/sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr_real.c similarity index 100% rename from kernels/sifive_x280/1/bli_swapv_sifive_x280_intr/bli_swapv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_swapv_sifive_rvv_intr/bli_swapv_sifive_rvv_intr_real.c diff --git a/kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr.c b/kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr.c rename to kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr.c index da688851d0..0f6a6d550b 100644 --- a/kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define XPBYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##xpbyv_sifive_x280_intr(\ +#define XPBYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##xpbyv_sifive_rvv_intr(\ conj_t conjx, \ dim_t n, \ const T* restrict x_, inc_t incx, \ @@ -51,7 +51,7 @@ #define XPBYV(...) XPBYV_(__VA_ARGS__) -#define COPYV_(PRECISION_CHAR) bli_##PRECISION_CHAR##copyv_sifive_x280_intr +#define COPYV_(PRECISION_CHAR) bli_##PRECISION_CHAR##copyv_sifive_rvv_intr #define COPYV(PRECISION_CHAR) COPYV_(PRECISION_CHAR) // Single precision real @@ -61,7 +61,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_xpbyv_sifive_x280_intr_real.c" +#include "./bli_xpbyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -76,7 +76,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_xpbyv_sifive_x280_intr_real.c" +#include "./bli_xpbyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -92,7 +92,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_xpbyv_sifive_x280_intr_complex.c" +#include "./bli_xpbyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -109,7 +109,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_xpbyv_sifive_x280_intr_complex.c" +#include "./bli_xpbyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr_complex.c index 4c86e8b36a..1eb2fff234 100644 --- a/kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr_real.c index b23272fea4..f4a8aa72eb 100644 --- a/kernels/sifive_x280/1/bli_xpbyv_sifive_x280_intr/bli_xpbyv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1/bli_xpbyv_sifive_rvv_intr/bli_xpbyv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr.c b/kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr.c rename to kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr.c index 1b5ce3b962..e9d4a8b5f5 100644 --- a/kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,7 +40,7 @@ #include "../../riscv_overloaded_intrinsics.h" -#define AXPY2V_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##axpy2v_sifive_x280_intr(\ +#define AXPY2V_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##axpy2v_sifive_rvv_intr(\ conj_t conjx, \ conj_t conjy, \ dim_t n, \ @@ -61,7 +61,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_axpy2v_sifive_x280_intr_real.c" +#include "./bli_axpy2v_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -76,7 +76,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_axpy2v_sifive_x280_intr_real.c" +#include "./bli_axpy2v_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -92,7 +92,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_axpy2v_sifive_x280_intr_complex.c" +#include "./bli_axpy2v_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -109,7 +109,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_axpy2v_sifive_x280_intr_complex.c" +#include "./bli_axpy2v_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr_complex.c index 9b57198272..de753d2249 100644 --- a/kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr_real.c b/kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr_real.c index cebb159973..b2e42155c8 100644 --- a/kernels/sifive_x280/1f/bli_axpy2v_sifive_x280_intr/bli_axpy2v_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1f/bli_axpy2v_sifive_rvv_intr/bli_axpy2v_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr.c b/kernels/sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr.c similarity index 94% rename from kernels/sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr.c rename to kernels/sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr.c index a5e0268467..ace31d7a8b 100644 --- a/kernels/sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr.c @@ -39,7 +39,7 @@ #include #include -#define AXPYF_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##axpyf_sifive_x280_intr(\ +#define AXPYF_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##axpyf_sifive_rvv_intr(\ conj_t conja, \ conj_t conjx, \ dim_t m, \ @@ -60,7 +60,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_axpyf_sifive_x280_intr_real.c" +#include "./bli_axpyf_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -75,7 +75,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_axpyf_sifive_x280_intr_real.c" +#include "./bli_axpyf_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -91,7 +91,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_axpyf_sifive_x280_intr_complex.c" +#include "./bli_axpyf_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -108,7 +108,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_axpyf_sifive_x280_intr_complex.c" +#include "./bli_axpyf_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr_complex.c similarity index 100% rename from kernels/sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr_complex.c diff --git a/kernels/sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr_real.c b/kernels/sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr_real.c similarity index 100% rename from kernels/sifive_x280/1f/bli_axpyf_sifive_x280_intr/bli_axpyf_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1f/bli_axpyf_sifive_rvv_intr/bli_axpyf_sifive_rvv_intr_real.c diff --git a/kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr.c b/kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr.c similarity index 92% rename from kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr.c rename to kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr.c index 9cd1071d7a..7d46f52b07 100644 --- a/kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -39,7 +39,7 @@ #include "blis.h" #include "../../riscv_overloaded_intrinsics.h" -#define DOTAXPYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotaxpyv_sifive_x280_intr(\ +#define DOTAXPYV_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotaxpyv_sifive_rvv_intr(\ conj_t conjxt, \ conj_t conjx, \ conj_t conjy, \ @@ -61,7 +61,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(float) -#include "./bli_dotaxpyv_sifive_x280_intr_real.c" +#include "./bli_dotaxpyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -76,7 +76,7 @@ #define LMUL m8 #define FLT_SIZE sizeof(double) -#include "./bli_dotaxpyv_sifive_x280_intr_real.c" +#include "./bli_dotaxpyv_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -92,7 +92,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_dotaxpyv_sifive_x280_intr_complex.c" +#include "./bli_dotaxpyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -109,7 +109,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_dotaxpyv_sifive_x280_intr_complex.c" +#include "./bli_dotaxpyv_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr_complex.c index c3cd06c523..7529fb7584 100644 --- a/kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr_complex.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr_real.c b/kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr_real.c similarity index 99% rename from kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr_real.c index adaf3610b0..0b6b7e0164 100644 --- a/kernels/sifive_x280/1f/bli_dotaxpyv_sifive_x280_intr/bli_dotaxpyv_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1f/bli_dotaxpyv_sifive_rvv_intr/bli_dotaxpyv_sifive_rvv_intr_real.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr.c b/kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr.c similarity index 93% rename from kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr.c rename to kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr.c index dc1bca9f6a..24052dd5c2 100644 --- a/kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr.c @@ -40,7 +40,7 @@ #include #include -#define DOTXAXPYF_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotxaxpyf_sifive_x280_intr(\ +#define DOTXAXPYF_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotxaxpyf_sifive_rvv_intr(\ conj_t conjat, \ conj_t conja, \ conj_t conjw, \ @@ -59,9 +59,9 @@ #define DOTXAXPYF(...) DOTXAXPYF_(__VA_ARGS__) -#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_x280_intr +#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_rvv_intr #define SETV(PRECISION_CHAR) SETV_(PRECISION_CHAR) -#define SCALV_(PRECISION_CHAR) bli_##PRECISION_CHAR##scalv_sifive_x280_intr +#define SCALV_(PRECISION_CHAR) bli_##PRECISION_CHAR##scalv_sifive_rvv_intr #define SCALV(PRECISION_CHAR) SCALV_(PRECISION_CHAR) // Single precision real @@ -71,7 +71,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_dotxaxpyf_sifive_x280_intr_real.c" +#include "./bli_dotxaxpyf_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -86,7 +86,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_dotxaxpyf_sifive_x280_intr_real.c" +#include "./bli_dotxaxpyf_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -102,7 +102,7 @@ #define LMUL m2 #define FLT_SIZE sizeof(float) -#include "./bli_dotxaxpyf_sifive_x280_intr_complex.c" +#include "./bli_dotxaxpyf_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -119,7 +119,7 @@ #define LMUL m2 #define FLT_SIZE sizeof(double) -#include "./bli_dotxaxpyf_sifive_x280_intr_complex.c" +#include "./bli_dotxaxpyf_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr_complex.c similarity index 76% rename from kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr_complex.c index d8a984064d..67edd9db3e 100644 --- a/kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr_complex.c @@ -35,89 +35,89 @@ // clang-format off #ifdef DOTXAXPYF -#define DOTXAXPYF_SIFIVE_X280_LOAD_ACOL(i) \ +#define DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL(i) \ do { \ acol_vec = VLSEG2_V_F(PREC, LMUL, 2)((BASE_DT*) (a_tmp + i * lda), vl); \ acol_vec_r = VGET_V_F(PREC, LMUL, 2)(acol_vec, 0); \ acol_vec_i = VGET_V_F(PREC, LMUL, 2)(acol_vec, 1); \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_LOAD_ACOL_STRIDED(i) \ +#define DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL_STRIDED(i) \ do { \ acol_vec = VLSSEG2_V_F(PREC, LMUL, 2)((BASE_DT*) (a_tmp + i * lda), 2 * FLT_SIZE * inca, vl); \ acol_vec_r = VGET_V_F(PREC, LMUL, 2)(acol_vec, 0); \ acol_vec_i = VGET_V_F(PREC, LMUL, 2)(acol_vec, 1); \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST(LOAD_SUF, DF_CONJ_SUF, AF_CONJ_SUF) \ +#define DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST(LOAD_SUF, DF_CONJ_SUF, AF_CONJ_SUF) \ do { \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ VCMUL_VV##DF_CONJ_SUF(PREC, LMUL, yacc0_r, yacc0_i, acol_vec_r, acol_vec_i, wvec_r, wvec_i, vl); \ VCMUL_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, acol_vec_r, acol_vec_i, x[0 * incx].real, x[0 * incx].imag, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ VCMUL_VV##DF_CONJ_SUF(PREC, LMUL, yacc1_r, yacc1_i, acol_vec_r, acol_vec_i, wvec_r, wvec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[1 * incx].real, x[1 * incx].imag, acol_vec_r, acol_vec_i, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ VCMUL_VV##DF_CONJ_SUF(PREC, LMUL, yacc2_r, yacc2_i, acol_vec_r, acol_vec_i, wvec_r, wvec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[2 * incx].real, x[2 * incx].imag, acol_vec_r, acol_vec_i, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ VCMUL_VV##DF_CONJ_SUF(PREC, LMUL, yacc3_r, yacc3_i, acol_vec_r, acol_vec_i, wvec_r, wvec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[3 * incx].real, x[3 * incx].imag, acol_vec_r, acol_vec_i, vl); \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_LOOP_BODY(LOAD_SUF, DF_CONJ_SUF, AF_CONJ_SUF) \ +#define DOTXAXPYF_SIFIVE_RVV_LOOP_BODY(LOAD_SUF, DF_CONJ_SUF, AF_CONJ_SUF) \ do { \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ VCMACC_VV##DF_CONJ_SUF##_TU(PREC, LMUL, yacc0_r, yacc0_i, wvec_r, wvec_i, acol_vec_r, acol_vec_i, vl); \ VCMUL_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, acol_vec_r, acol_vec_i, x[0 * incx].real, x[0 * incx].imag, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ VCMACC_VV##DF_CONJ_SUF##_TU(PREC, LMUL, yacc1_r, yacc1_i, wvec_r, wvec_i, acol_vec_r, acol_vec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[1 * incx].real, x[1 * incx].imag, acol_vec_r, acol_vec_i, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ VCMACC_VV##DF_CONJ_SUF##_TU(PREC, LMUL, yacc2_r, yacc2_i, wvec_r, wvec_i, acol_vec_r, acol_vec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[2 * incx].real, x[2 * incx].imag, acol_vec_r, acol_vec_i, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ VCMACC_VV##DF_CONJ_SUF##_TU(PREC, LMUL, yacc3_r, yacc3_i, wvec_r, wvec_i, acol_vec_r, acol_vec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[3 * incx].real, x[3 * incx].imag, acol_vec_r, acol_vec_i, vl); \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST(LOAD_SUF, DF_CONJ_SUF, AF_CONJ_SUF) \ +#define DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST(LOAD_SUF, DF_CONJ_SUF, AF_CONJ_SUF) \ do { \ switch (b) { \ case 3: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ VCMUL_VV##DF_CONJ_SUF(PREC, LMUL, yacc2_r, yacc2_i, acol_vec_r, acol_vec_i, wvec_r, wvec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[2 * incx].real, x[2 * incx].imag, acol_vec_r, acol_vec_i, vl); \ case 2: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ VCMUL_VV##DF_CONJ_SUF(PREC, LMUL, yacc1_r, yacc1_i, acol_vec_r, acol_vec_i, wvec_r, wvec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[1 * incx].real, x[1 * incx].imag, acol_vec_r, acol_vec_i, vl); \ case 1: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ VCMUL_VV##DF_CONJ_SUF(PREC, LMUL, yacc0_r, yacc0_i, acol_vec_r, acol_vec_i, wvec_r, wvec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[0 * incx].real, x[0 * incx].imag, acol_vec_r, acol_vec_i, vl); \ } \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY(LOAD_SUF, DF_CONJ_SUF, AF_CONJ_SUF) \ +#define DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY(LOAD_SUF, DF_CONJ_SUF, AF_CONJ_SUF) \ do { \ switch (b) { \ case 3: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ VCMACC_VV##DF_CONJ_SUF##_TU(PREC, LMUL, yacc2_r, yacc2_i, wvec_r, wvec_i, acol_vec_r, acol_vec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[2 * incx].real, x[2 * incx].imag, acol_vec_r, acol_vec_i, vl); \ case 2: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ VCMACC_VV##DF_CONJ_SUF##_TU(PREC, LMUL, yacc1_r, yacc1_i, wvec_r, wvec_i, acol_vec_r, acol_vec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[1 * incx].real, x[1 * incx].imag, acol_vec_r, acol_vec_i, vl); \ case 1: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ VCMACC_VV##DF_CONJ_SUF##_TU(PREC, LMUL, yacc0_r, yacc0_i, wvec_r, wvec_i, acol_vec_r, acol_vec_i, vl); \ VCMACC_VF##AF_CONJ_SUF(PREC, LMUL, zacc_r, zacc_i, x[0 * incx].real, x[0 * incx].imag, acol_vec_r, acol_vec_i, vl); \ } \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_REDUCE(i) \ +#define DOTXAXPYF_SIFIVE_RVV_REDUCE(i) \ do { \ RVV_TYPE_F(PREC, m1) dot##i##_r = VFMV_S_F(PREC, m1)(0., 1); \ RVV_TYPE_F(PREC, m1) dot##i##_i = VFMV_S_F(PREC, m1)(0., 1); \ @@ -205,29 +205,29 @@ DOTXAXPYF(PRECISION_CHAR, void) if (bli_is_conj(conjat)) { if (bli_is_conj(conja)) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST( , _CONJ, _CONJ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST( , _CONJ, _CONJ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST(_STRIDED, _CONJ, _CONJ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST(_STRIDED, _CONJ, _CONJ); } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST( , _CONJ, ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST( , _CONJ, ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST(_STRIDED, _CONJ, ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST(_STRIDED, _CONJ, ); } } else { if (bli_is_conj(conja)) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST( , , _CONJ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST( , , _CONJ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST(_STRIDED, , _CONJ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST(_STRIDED, , _CONJ); } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST( , , ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST( , , ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST(_STRIDED, , ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST(_STRIDED, , ); } } first = false; @@ -236,29 +236,29 @@ DOTXAXPYF(PRECISION_CHAR, void) if (bli_is_conj(conjat)) { if (bli_is_conj(conja)) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY( , _CONJ, _CONJ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY( , _CONJ, _CONJ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY(_STRIDED, _CONJ, _CONJ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY(_STRIDED, _CONJ, _CONJ); } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY( , _CONJ, ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY( , _CONJ, ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY(_STRIDED, _CONJ, ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY(_STRIDED, _CONJ, ); } } else { if (bli_is_conj(conja)) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY( , , _CONJ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY( , , _CONJ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY(_STRIDED, , _CONJ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY(_STRIDED, , _CONJ); } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY( , , ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY( , , ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY(_STRIDED, , ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY(_STRIDED, , ); } } } @@ -287,10 +287,10 @@ DOTXAXPYF(PRECISION_CHAR, void) avl -= vl; } - DOTXAXPYF_SIFIVE_X280_REDUCE(0); - DOTXAXPYF_SIFIVE_X280_REDUCE(1); - DOTXAXPYF_SIFIVE_X280_REDUCE(2); - DOTXAXPYF_SIFIVE_X280_REDUCE(3); + DOTXAXPYF_SIFIVE_RVV_REDUCE(0); + DOTXAXPYF_SIFIVE_RVV_REDUCE(1); + DOTXAXPYF_SIFIVE_RVV_REDUCE(2); + DOTXAXPYF_SIFIVE_RVV_REDUCE(3); a += 4 * lda; x += 4 * incx; @@ -322,29 +322,29 @@ DOTXAXPYF(PRECISION_CHAR, void) if (bli_is_conj(conjat)) { if (bli_is_conj(conja)) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST( , _CONJ, _CONJ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST( , _CONJ, _CONJ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST(_STRIDED, _CONJ, _CONJ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST(_STRIDED, _CONJ, _CONJ); } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST( , _CONJ, ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST( , _CONJ, ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST(_STRIDED, _CONJ, ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST(_STRIDED, _CONJ, ); } } else { if (bli_is_conj(conja)) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST( , , _CONJ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST( , , _CONJ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST(_STRIDED, , _CONJ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST(_STRIDED, , _CONJ); } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST( , , ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST( , , ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST(_STRIDED, , ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST(_STRIDED, , ); } } first = false; @@ -353,29 +353,29 @@ DOTXAXPYF(PRECISION_CHAR, void) if (bli_is_conj(conjat)) { if (bli_is_conj(conja)) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY( , _CONJ, _CONJ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY( , _CONJ, _CONJ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY(_STRIDED, _CONJ, _CONJ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY(_STRIDED, _CONJ, _CONJ); } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY( , _CONJ, ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY( , _CONJ, ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY(_STRIDED, _CONJ, ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY(_STRIDED, _CONJ, ); } } else { if (bli_is_conj(conja)) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY( , , _CONJ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY( , , _CONJ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY(_STRIDED, , _CONJ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY(_STRIDED, , _CONJ); } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY( , , ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY( , , ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY(_STRIDED, , ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY(_STRIDED, , ); } } } @@ -406,22 +406,22 @@ DOTXAXPYF(PRECISION_CHAR, void) switch (b) { case 3: - DOTXAXPYF_SIFIVE_X280_REDUCE(2); + DOTXAXPYF_SIFIVE_RVV_REDUCE(2); case 2: - DOTXAXPYF_SIFIVE_X280_REDUCE(1); + DOTXAXPYF_SIFIVE_RVV_REDUCE(1); case 1: - DOTXAXPYF_SIFIVE_X280_REDUCE(0); + DOTXAXPYF_SIFIVE_RVV_REDUCE(0); } } return; } -#undef DOTXAXPYF_SIFIVE_X280_LOAD_ACOL -#undef DOTXAXPYF_SIFIVE_X280_LOAD_ACOL_STRIDED -#undef DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST -#undef DOTXAXPYF_SIFIVE_X280_LOOP_BODY -#undef DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST -#undef DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY -#undef DOTXAXPYF_SIFIVE_X280_REDUCE +#undef DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL +#undef DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL_STRIDED +#undef DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST +#undef DOTXAXPYF_SIFIVE_RVV_LOOP_BODY +#undef DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST +#undef DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY +#undef DOTXAXPYF_SIFIVE_RVV_REDUCE #endif // DOTXAXPYF diff --git a/kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr_real.c b/kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr_real.c similarity index 79% rename from kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr_real.c index 57ef4f7447..7143d3a974 100644 --- a/kernels/sifive_x280/1f/bli_dotxaxpyf_sifive_x280_intr/bli_dotxaxpyf_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1f/bli_dotxaxpyf_sifive_rvv_intr/bli_dotxaxpyf_sifive_rvv_intr_real.c @@ -35,85 +35,85 @@ // clang-format off #ifdef DOTXAXPYF -#define DOTXAXPYF_SIFIVE_X280_LOAD_ACOL(i) \ +#define DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL(i) \ do { \ acol_vec = VLE_V_F(PREC, LMUL)(a_tmp + i * lda, vl); \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_LOAD_ACOL_STRIDED(i) \ +#define DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL_STRIDED(i) \ do { \ acol_vec = VLSE_V_F(PREC, LMUL)(a_tmp + i * lda, FLT_SIZE * inca, vl); \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST(LOAD_SUF) \ +#define DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST(LOAD_SUF) \ do { \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ yacc0 = VFMUL_VV(PREC, LMUL)(acol_vec, wvec, vl); \ zacc = VFMUL_VF(PREC, LMUL)(acol_vec, x[0 * incx], vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ yacc1 = VFMUL_VV(PREC, LMUL)(acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[1 * incx], acol_vec, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ yacc2 = VFMUL_VV(PREC, LMUL)(acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[2 * incx], acol_vec, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ yacc3 = VFMUL_VV(PREC, LMUL)(acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[3 * incx], acol_vec, vl); \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_LOOP_BODY(LOAD_SUF) \ +#define DOTXAXPYF_SIFIVE_RVV_LOOP_BODY(LOAD_SUF) \ do { \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ yacc0 = VFMACC_VV_TU(PREC, LMUL)(yacc0, acol_vec, wvec, vl); \ zacc = VFMUL_VF(PREC, LMUL)(acol_vec, x[0 * incx], vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ yacc1 = VFMACC_VV_TU(PREC, LMUL)(yacc1, acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[1 * incx], acol_vec, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ yacc2 = VFMACC_VV_TU(PREC, LMUL)(yacc2, acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[2 * incx], acol_vec, vl); \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ yacc3 = VFMACC_VV_TU(PREC, LMUL)(yacc3, acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[3 * incx], acol_vec, vl); \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST(LOAD_SUF) \ +#define DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST(LOAD_SUF) \ do { \ switch (b) { \ case 3: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ yacc2 = VFMUL_VV(PREC, LMUL)(acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[2 * incx], acol_vec, vl); \ case 2: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ yacc1 = VFMUL_VV(PREC, LMUL)(acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[1 * incx], acol_vec, vl); \ case 1: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ yacc0 = VFMUL_VV(PREC, LMUL)(acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[0 * incx], acol_vec, vl); \ } \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY(LOAD_SUF) \ +#define DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY(LOAD_SUF) \ do { \ switch (b) { \ case 3: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ yacc2 = VFMACC_VV_TU(PREC, LMUL)(yacc2, acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[2 * incx], acol_vec, vl); \ case 2: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ yacc1 = VFMACC_VV_TU(PREC, LMUL)(yacc1, acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[1 * incx], acol_vec, vl); \ case 1: \ - DOTXAXPYF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ yacc0 = VFMACC_VV_TU(PREC, LMUL)(yacc0, acol_vec, wvec, vl); \ zacc = VFMACC_VF(PREC, LMUL)(zacc, x[0 * incx], acol_vec, vl); \ } \ } while (0) -#define DOTXAXPYF_SIFIVE_X280_REDUCE(i) \ +#define DOTXAXPYF_SIFIVE_RVV_REDUCE(i) \ do { \ RVV_TYPE_F(PREC, m1) dot##i = VFMV_S_F(PREC, m1)(0., 1); \ dot##i = VF_REDUSUM_VS(PREC, LMUL)(yacc##i, dot##i, m); \ @@ -174,16 +174,16 @@ DOTXAXPYF(PRECISION_CHAR, void) wvec = VLSE_V_F(PREC, LMUL)(w_tmp, FLT_SIZE * incw, vl); if (first) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST( ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST( ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST(_STRIDED); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST(_STRIDED); first = false; } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_LOOP_BODY( ); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY( ); else - DOTXAXPYF_SIFIVE_X280_LOOP_BODY(_STRIDED); + DOTXAXPYF_SIFIVE_RVV_LOOP_BODY(_STRIDED); } RVV_TYPE_F(PREC, LMUL) zvec; @@ -203,10 +203,10 @@ DOTXAXPYF(PRECISION_CHAR, void) avl -= vl; } - DOTXAXPYF_SIFIVE_X280_REDUCE(0); - DOTXAXPYF_SIFIVE_X280_REDUCE(1); - DOTXAXPYF_SIFIVE_X280_REDUCE(2); - DOTXAXPYF_SIFIVE_X280_REDUCE(3); + DOTXAXPYF_SIFIVE_RVV_REDUCE(0); + DOTXAXPYF_SIFIVE_RVV_REDUCE(1); + DOTXAXPYF_SIFIVE_RVV_REDUCE(2); + DOTXAXPYF_SIFIVE_RVV_REDUCE(3); a += 4 * lda; x += 4 * incx; @@ -231,16 +231,16 @@ DOTXAXPYF(PRECISION_CHAR, void) wvec = VLSE_V_F(PREC, LMUL)(w_tmp, FLT_SIZE * incw, vl); if (first) { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST( ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST( ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST(_STRIDED); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST(_STRIDED); first = false; } else { if (inca == 1) - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY( ); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY( ); else - DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY(_STRIDED); + DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY(_STRIDED); } RVV_TYPE_F(PREC, LMUL) zvec; @@ -262,22 +262,22 @@ DOTXAXPYF(PRECISION_CHAR, void) switch (b) { case 3: - DOTXAXPYF_SIFIVE_X280_REDUCE(2); + DOTXAXPYF_SIFIVE_RVV_REDUCE(2); case 2: - DOTXAXPYF_SIFIVE_X280_REDUCE(1); + DOTXAXPYF_SIFIVE_RVV_REDUCE(1); case 1: - DOTXAXPYF_SIFIVE_X280_REDUCE(0); + DOTXAXPYF_SIFIVE_RVV_REDUCE(0); } } return; } -#undef DOTXAXPYF_SIFIVE_X280_LOAD_ACOL -#undef DOTXAXPYF_SIFIVE_X280_LOAD_ACOL_STRIDED -#undef DOTXAXPYF_SIFIVE_X280_LOOP_BODY_FIRST -#undef DOTXAXPYF_SIFIVE_X280_LOOP_BODY -#undef DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY_FIRST -#undef DOTXAXPYF_SIFIVE_X280_CLEANUP_BODY -#undef DOTXAXPYF_SIFIVE_X280_REDUCE +#undef DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL +#undef DOTXAXPYF_SIFIVE_RVV_LOAD_ACOL_STRIDED +#undef DOTXAXPYF_SIFIVE_RVV_LOOP_BODY_FIRST +#undef DOTXAXPYF_SIFIVE_RVV_LOOP_BODY +#undef DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY_FIRST +#undef DOTXAXPYF_SIFIVE_RVV_CLEANUP_BODY +#undef DOTXAXPYF_SIFIVE_RVV_REDUCE #endif // DOTXAXPYF diff --git a/kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr.c b/kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr.c similarity index 94% rename from kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr.c rename to kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr.c index 9396515b30..e65f0637dd 100644 --- a/kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr.c @@ -39,7 +39,7 @@ #include #include -#define DOTXF_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotxf_sifive_x280_intr(\ +#define DOTXF_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##dotxf_sifive_rvv_intr(\ conj_t conjat, \ conj_t conjx, \ dim_t m, \ @@ -54,9 +54,9 @@ #define DOTXF(...) DOTXF_(__VA_ARGS__) -#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_x280_intr +#define SETV_(PRECISION_CHAR) bli_##PRECISION_CHAR##setv_sifive_rvv_intr #define SETV(PRECISION_CHAR) SETV_(PRECISION_CHAR) -#define SCALV_(PRECISION_CHAR) bli_##PRECISION_CHAR##scalv_sifive_x280_intr +#define SCALV_(PRECISION_CHAR) bli_##PRECISION_CHAR##scalv_sifive_rvv_intr #define SCALV(PRECISION_CHAR) SCALV_(PRECISION_CHAR) // Single precision real @@ -66,7 +66,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) -#include "./bli_dotxf_sifive_x280_intr_real.c" +#include "./bli_dotxf_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -81,7 +81,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) -#include "./bli_dotxf_sifive_x280_intr_real.c" +#include "./bli_dotxf_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -97,7 +97,7 @@ #define LMUL m2 #define FLT_SIZE sizeof(float) -#include "./bli_dotxf_sifive_x280_intr_complex.c" +#include "./bli_dotxf_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -114,7 +114,7 @@ #define LMUL m2 #define FLT_SIZE sizeof(double) -#include "./bli_dotxf_sifive_x280_intr_complex.c" +#include "./bli_dotxf_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr_complex.c similarity index 74% rename from kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr_complex.c index 463a111f07..8cdc4b76e7 100644 --- a/kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr_complex.c @@ -35,95 +35,95 @@ // clang-format off #ifdef DOTXF -#define DOTXF_SIFIVE_X280_LOAD_ACOL(i) \ +#define DOTXF_SIFIVE_RVV_LOAD_ACOL(i) \ do { \ acol_vec = VLSEG2_V_F(PREC, LMUL, 2)((BASE_DT*) (a_tmp + i * lda), vl); \ acol_vec_r = VGET_V_F(PREC, LMUL, 2)(acol_vec, 0); \ acol_vec_i = VGET_V_F(PREC, LMUL, 2)(acol_vec, 1); \ } while (0) -#define DOTXF_SIFIVE_X280_LOAD_ACOL_STRIDED(i) \ +#define DOTXF_SIFIVE_RVV_LOAD_ACOL_STRIDED(i) \ do { \ acol_vec = VLSSEG2_V_F(PREC, LMUL, 2)((BASE_DT*) (a_tmp + i * lda), 2 * FLT_SIZE * inca, vl); \ acol_vec_r = VGET_V_F(PREC, LMUL, 2)(acol_vec, 0); \ acol_vec_i = VGET_V_F(PREC, LMUL, 2)(acol_vec, 1); \ } while (0) -#define DOTXF_SIFIVE_X280_LOOP_BODY_FIRST(LOAD_SUF, CONJ_SUF) \ +#define DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST(LOAD_SUF, CONJ_SUF) \ do { \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc0_r, acc0_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc1_r, acc1_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc2_r, acc2_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc3_r, acc3_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(4); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(4); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc4_r, acc4_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(5); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(5); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc5_r, acc5_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ } while (0) -#define DOTXF_SIFIVE_X280_LOOP_BODY(LOAD_SUF, CONJ_SUF) \ +#define DOTXF_SIFIVE_RVV_LOOP_BODY(LOAD_SUF, CONJ_SUF) \ do { \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc0_r, acc0_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc1_r, acc1_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc2_r, acc2_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc3_r, acc3_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(4); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(4); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc4_r, acc4_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(5); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(5); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc5_r, acc5_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ } while (0) -#define DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST(LOAD_SUF, CONJ_SUF) \ +#define DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST(LOAD_SUF, CONJ_SUF) \ do { \ switch (b) { \ case 5: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(4); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(4); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc4_r, acc4_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ case 4: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc3_r, acc3_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ case 3: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc2_r, acc2_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ case 2: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc1_r, acc1_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ case 1: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ VCMUL_VV##CONJ_SUF(PREC, LMUL, acc0_r, acc0_i, acol_vec_r, acol_vec_i, xvec_r, xvec_i, vl); \ } \ } while (0) -#define DOTXF_SIFIVE_X280_CLEANUP_BODY(LOAD_SUF, CONJ_SUF) \ +#define DOTXF_SIFIVE_RVV_CLEANUP_BODY(LOAD_SUF, CONJ_SUF) \ do { \ switch (b) { \ case 5: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(4); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(4); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc4_r, acc4_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ case 4: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc3_r, acc3_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ case 3: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc2_r, acc2_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ case 2: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc1_r, acc1_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ case 1: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ VCMACC_VV##CONJ_SUF##_TU(PREC, LMUL, acc0_r, acc0_i, xvec_r, xvec_i, acol_vec_r, acol_vec_i, vl); \ } \ } while (0) -#define DOTXF_SIFIVE_X280_REDUCE(i) \ +#define DOTXF_SIFIVE_RVV_REDUCE(i) \ do { \ RVV_TYPE_F(PREC, m1) dot##i##_r = VFMV_S_F(PREC, m1)(0., 1); \ RVV_TYPE_F(PREC, m1) dot##i##_i = VFMV_S_F(PREC, m1)(0., 1); \ @@ -200,30 +200,30 @@ DOTXF(PRECISION_CHAR, void) if (first) { if (bli_is_conj(conjat)) { if (inca == 1) - DOTXF_SIFIVE_X280_LOOP_BODY_FIRST( , _CONJ); + DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST( , _CONJ); else - DOTXF_SIFIVE_X280_LOOP_BODY_FIRST(_STRIDED, _CONJ); + DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST(_STRIDED, _CONJ); } else { if (inca == 1) - DOTXF_SIFIVE_X280_LOOP_BODY_FIRST( , ); + DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST( , ); else - DOTXF_SIFIVE_X280_LOOP_BODY_FIRST(_STRIDED, ); + DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST(_STRIDED, ); } first = false; } else { if (bli_is_conj(conjat)) { if (inca == 1) - DOTXF_SIFIVE_X280_LOOP_BODY( , _CONJ); + DOTXF_SIFIVE_RVV_LOOP_BODY( , _CONJ); else - DOTXF_SIFIVE_X280_LOOP_BODY(_STRIDED, _CONJ); + DOTXF_SIFIVE_RVV_LOOP_BODY(_STRIDED, _CONJ); } else { if (inca == 1) - DOTXF_SIFIVE_X280_LOOP_BODY( , ); + DOTXF_SIFIVE_RVV_LOOP_BODY( , ); else - DOTXF_SIFIVE_X280_LOOP_BODY(_STRIDED, ); + DOTXF_SIFIVE_RVV_LOOP_BODY(_STRIDED, ); } } @@ -232,12 +232,12 @@ DOTXF(PRECISION_CHAR, void) avl -= vl; } - DOTXF_SIFIVE_X280_REDUCE(0); - DOTXF_SIFIVE_X280_REDUCE(1); - DOTXF_SIFIVE_X280_REDUCE(2); - DOTXF_SIFIVE_X280_REDUCE(3); - DOTXF_SIFIVE_X280_REDUCE(4); - DOTXF_SIFIVE_X280_REDUCE(5); + DOTXF_SIFIVE_RVV_REDUCE(0); + DOTXF_SIFIVE_RVV_REDUCE(1); + DOTXF_SIFIVE_RVV_REDUCE(2); + DOTXF_SIFIVE_RVV_REDUCE(3); + DOTXF_SIFIVE_RVV_REDUCE(4); + DOTXF_SIFIVE_RVV_REDUCE(5); a += 6 * lda; y += 6 * incy; @@ -265,30 +265,30 @@ DOTXF(PRECISION_CHAR, void) if (first) { if (bli_is_conj(conjat)) { if (inca == 1) - DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST( , _CONJ); + DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST( , _CONJ); else - DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST(_STRIDED, _CONJ); + DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST(_STRIDED, _CONJ); } else { if (inca == 1) - DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST( , ); + DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST( , ); else - DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST(_STRIDED, ); + DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST(_STRIDED, ); } first = false; } else { if (bli_is_conj(conjat)) { if (inca == 1) - DOTXF_SIFIVE_X280_CLEANUP_BODY( , _CONJ); + DOTXF_SIFIVE_RVV_CLEANUP_BODY( , _CONJ); else - DOTXF_SIFIVE_X280_CLEANUP_BODY(_STRIDED, _CONJ); + DOTXF_SIFIVE_RVV_CLEANUP_BODY(_STRIDED, _CONJ); } else { if (inca == 1) - DOTXF_SIFIVE_X280_CLEANUP_BODY( , ); + DOTXF_SIFIVE_RVV_CLEANUP_BODY( , ); else - DOTXF_SIFIVE_X280_CLEANUP_BODY(_STRIDED, ); + DOTXF_SIFIVE_RVV_CLEANUP_BODY(_STRIDED, ); } } @@ -299,26 +299,26 @@ DOTXF(PRECISION_CHAR, void) switch (b) { case 5: - DOTXF_SIFIVE_X280_REDUCE(4); + DOTXF_SIFIVE_RVV_REDUCE(4); case 4: - DOTXF_SIFIVE_X280_REDUCE(3); + DOTXF_SIFIVE_RVV_REDUCE(3); case 3: - DOTXF_SIFIVE_X280_REDUCE(2); + DOTXF_SIFIVE_RVV_REDUCE(2); case 2: - DOTXF_SIFIVE_X280_REDUCE(1); + DOTXF_SIFIVE_RVV_REDUCE(1); case 1: - DOTXF_SIFIVE_X280_REDUCE(0); + DOTXF_SIFIVE_RVV_REDUCE(0); } } return; } -#undef DOTXF_SIFIVE_X280_LOAD_ACOL -#undef DOTXF_SIFIVE_X280_LOAD_ACOL_STRIDED -#undef DOTXF_SIFIVE_X280_LOOP_BODY_FIRST -#undef DOTXF_SIFIVE_X280_LOOP_BODY -#undef DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST -#undef DOTXF_SIFIVE_X280_CLEANUP_BODY -#undef DOTXF_SIFIVE_X280_REDUCE +#undef DOTXF_SIFIVE_RVV_LOAD_ACOL +#undef DOTXF_SIFIVE_RVV_LOAD_ACOL_STRIDED +#undef DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST +#undef DOTXF_SIFIVE_RVV_LOOP_BODY +#undef DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST +#undef DOTXF_SIFIVE_RVV_CLEANUP_BODY +#undef DOTXF_SIFIVE_RVV_REDUCE #endif // DOTXF diff --git a/kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr_real.c b/kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr_real.c similarity index 72% rename from kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr_real.c index 8286e2476f..cdc8f259e0 100644 --- a/kernels/sifive_x280/1f/bli_dotxf_sifive_x280_intr/bli_dotxf_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1f/bli_dotxf_sifive_rvv_intr/bli_dotxf_sifive_rvv_intr_real.c @@ -35,91 +35,91 @@ // clang-format off #ifdef DOTXF -#define DOTXF_SIFIVE_X280_LOAD_ACOL(i) \ +#define DOTXF_SIFIVE_RVV_LOAD_ACOL(i) \ do { \ acol_vec = VLE_V_F(PREC, LMUL)(a_tmp + i * lda, vl); \ } while (0) -#define DOTXF_SIFIVE_X280_LOAD_ACOL_STRIDED(i) \ +#define DOTXF_SIFIVE_RVV_LOAD_ACOL_STRIDED(i) \ do { \ acol_vec = VLSE_V_F(PREC, LMUL)(a_tmp + i * lda, FLT_SIZE * inca, vl); \ } while (0) -#define DOTXF_SIFIVE_X280_LOOP_BODY_FIRST(LOAD_SUF) \ +#define DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST(LOAD_SUF) \ do { \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ acc0 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ acc1 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ acc2 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ acc3 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(4); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(4); \ acc4 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(5); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(5); \ acc5 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ } while (0) -#define DOTXF_SIFIVE_X280_LOOP_BODY(LOAD_SUF) \ +#define DOTXF_SIFIVE_RVV_LOOP_BODY(LOAD_SUF) \ do { \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ acc0 = VFMACC_VV_TU(PREC, LMUL)(acc0, acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ acc1 = VFMACC_VV_TU(PREC, LMUL)(acc1, acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ acc2 = VFMACC_VV_TU(PREC, LMUL)(acc2, acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ acc3 = VFMACC_VV_TU(PREC, LMUL)(acc3, acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(4); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(4); \ acc4 = VFMACC_VV_TU(PREC, LMUL)(acc4, acol_vec, xvec, vl); \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(5); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(5); \ acc5 = VFMACC_VV_TU(PREC, LMUL)(acc5, acol_vec, xvec, vl); \ } while (0) -#define DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST(LOAD_SUF) \ +#define DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST(LOAD_SUF) \ do { \ switch (b) { \ case 5: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(4); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(4); \ acc4 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ case 4: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ acc3 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ case 3: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ acc2 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ case 2: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ acc1 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ case 1: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ acc0 = VFMUL_VV(PREC, LMUL)(acol_vec, xvec, vl); \ } \ } while (0) -#define DOTXF_SIFIVE_X280_CLEANUP_BODY(LOAD_SUF) \ +#define DOTXF_SIFIVE_RVV_CLEANUP_BODY(LOAD_SUF) \ do { \ switch (b) { \ case 5: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(4); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(4); \ acc4 = VFMACC_VV_TU(PREC, LMUL)(acc4, acol_vec, xvec, vl); \ case 4: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(3); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(3); \ acc3 = VFMACC_VV_TU(PREC, LMUL)(acc3, acol_vec, xvec, vl); \ case 3: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(2); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(2); \ acc2 = VFMACC_VV_TU(PREC, LMUL)(acc2, acol_vec, xvec, vl); \ case 2: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(1); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(1); \ acc1 = VFMACC_VV_TU(PREC, LMUL)(acc1, acol_vec, xvec, vl); \ case 1: \ - DOTXF_SIFIVE_X280_LOAD_ACOL##LOAD_SUF(0); \ + DOTXF_SIFIVE_RVV_LOAD_ACOL##LOAD_SUF(0); \ acc0 = VFMACC_VV_TU(PREC, LMUL)(acc0, acol_vec, xvec, vl); \ } \ } while (0) -#define DOTXF_SIFIVE_X280_REDUCE(i) \ +#define DOTXF_SIFIVE_RVV_REDUCE(i) \ do { \ RVV_TYPE_F(PREC, m1) dot##i = VFMV_S_F(PREC, m1)(0., 1); \ dot##i = VF_REDUSUM_VS(PREC, LMUL)(acc##i, dot##i, m); \ @@ -173,16 +173,16 @@ DOTXF(PRECISION_CHAR, void) xvec = VLSE_V_F(PREC, LMUL)(x_tmp, FLT_SIZE * incx, vl); if (first) { if (inca == 1) - DOTXF_SIFIVE_X280_LOOP_BODY_FIRST(); + DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST(); else - DOTXF_SIFIVE_X280_LOOP_BODY_FIRST(_STRIDED); + DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST(_STRIDED); first = false; } else { if (inca == 1) - DOTXF_SIFIVE_X280_LOOP_BODY(); + DOTXF_SIFIVE_RVV_LOOP_BODY(); else - DOTXF_SIFIVE_X280_LOOP_BODY(_STRIDED); + DOTXF_SIFIVE_RVV_LOOP_BODY(_STRIDED); } a_tmp += vl * inca; @@ -190,12 +190,12 @@ DOTXF(PRECISION_CHAR, void) avl -= vl; } - DOTXF_SIFIVE_X280_REDUCE(0); - DOTXF_SIFIVE_X280_REDUCE(1); - DOTXF_SIFIVE_X280_REDUCE(2); - DOTXF_SIFIVE_X280_REDUCE(3); - DOTXF_SIFIVE_X280_REDUCE(4); - DOTXF_SIFIVE_X280_REDUCE(5); + DOTXF_SIFIVE_RVV_REDUCE(0); + DOTXF_SIFIVE_RVV_REDUCE(1); + DOTXF_SIFIVE_RVV_REDUCE(2); + DOTXF_SIFIVE_RVV_REDUCE(3); + DOTXF_SIFIVE_RVV_REDUCE(4); + DOTXF_SIFIVE_RVV_REDUCE(5); a += 6 * lda; y += 6 * incy; @@ -217,16 +217,16 @@ DOTXF(PRECISION_CHAR, void) xvec = VLSE_V_F(PREC, LMUL)(x_tmp, FLT_SIZE * incx, vl); if (first) { if (inca == 1) - DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST(); + DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST(); else - DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST(_STRIDED); + DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST(_STRIDED); first = false; } else { if (inca == 1) - DOTXF_SIFIVE_X280_CLEANUP_BODY(); + DOTXF_SIFIVE_RVV_CLEANUP_BODY(); else - DOTXF_SIFIVE_X280_CLEANUP_BODY(_STRIDED); + DOTXF_SIFIVE_RVV_CLEANUP_BODY(_STRIDED); } a_tmp += vl * inca; @@ -236,27 +236,27 @@ DOTXF(PRECISION_CHAR, void) switch (b) { case 5: - DOTXF_SIFIVE_X280_REDUCE(4); + DOTXF_SIFIVE_RVV_REDUCE(4); case 4: - DOTXF_SIFIVE_X280_REDUCE(3); + DOTXF_SIFIVE_RVV_REDUCE(3); case 3: - DOTXF_SIFIVE_X280_REDUCE(2); + DOTXF_SIFIVE_RVV_REDUCE(2); case 2: - DOTXF_SIFIVE_X280_REDUCE(1); + DOTXF_SIFIVE_RVV_REDUCE(1); case 1: - DOTXF_SIFIVE_X280_REDUCE(0); + DOTXF_SIFIVE_RVV_REDUCE(0); } } return; } -#undef DOTXF_SIFIVE_X280_LOAD_ACOL -#undef DOTXF_SIFIVE_X280_LOAD_ACOL_STRIDED -#undef DOTXF_SIFIVE_X280_LOOP_BODY_FIRST -#undef DOTXF_SIFIVE_X280_LOOP_BODY -#undef DOTXF_SIFIVE_X280_CLEANUP_BODY_FIRST -#undef DOTXF_SIFIVE_X280_CLEANUP_BODY -#undef DOTXF_SIFIVE_X280_REDUCE +#undef DOTXF_SIFIVE_RVV_LOAD_ACOL +#undef DOTXF_SIFIVE_RVV_LOAD_ACOL_STRIDED +#undef DOTXF_SIFIVE_RVV_LOOP_BODY_FIRST +#undef DOTXF_SIFIVE_RVV_LOOP_BODY +#undef DOTXF_SIFIVE_RVV_CLEANUP_BODY_FIRST +#undef DOTXF_SIFIVE_RVV_CLEANUP_BODY +#undef DOTXF_SIFIVE_RVV_REDUCE #endif // DOTXF diff --git a/kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr.c b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c similarity index 86% rename from kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr.c rename to kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c index 119872197a..a0a4eb3c3d 100644 --- a/kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr.c +++ b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c @@ -40,7 +40,7 @@ #include #include -#define PACKM_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##packm_sifive_x280_intr(\ +#define PACKM_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##packm_sifive_rvv_intr(\ conj_t conja, \ pack_t schema, \ dim_t cdim, \ @@ -57,8 +57,11 @@ #define PACKM(...) PACKM_(__VA_ARGS__) -#define REF_KERNEL_(PRECISION_CHAR) bli_##PRECISION_CHAR##PRECISION_CHAR##packm_sifive_x280_ref -#define REF_KERNEL(PRECISION_CHAR) REF_KERNEL_(PRECISION_CHAR) +#define BLI_SCAL2BBS_MXN_(PRECISION_CHAR) bli_##PRECISION_CHAR##scal2bbs_mxn +#define BLI_SCAL2BBS_MXN(PRECISION_CHAR) BLI_SCAL2BBS_MXN_(PRECISION_CHAR) + +#define BLI_SET0S_EDGE_(PRECISION_CHAR) bli_##PRECISION_CHAR##set0s_edge +#define BLI_SET0S_EDGE(PRECISION_CHAR) BLI_SET0S_EDGE_(PRECISION_CHAR) // LMUL is the LMUL used when a is "row major" (lda == 1). Since we use // segment stores with more than 4 fields, this is usually m1. @@ -74,9 +77,9 @@ #define LMUL_NR m4 #define FLT_SIZE sizeof(float) #define MR 7 -#define NR 64 +#define NR ( 4 * __riscv_v_min_vlen / 32 ) -#include "./bli_packm_sifive_x280_intr_real.c" +#include "./bli_packm_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -97,9 +100,9 @@ #define LMUL_NR m4 #define FLT_SIZE sizeof(double) #define MR 7 -#define NR 32 +#define NR ( 4 * __riscv_v_min_vlen / 64 ) -#include "./bli_packm_sifive_x280_intr_real.c" +#include "./bli_packm_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -121,9 +124,9 @@ #define LMUL_NR m2 #define FLT_SIZE sizeof(float) #define MR 6 -#define NR 32 +#define NR ( 2 * __riscv_v_min_vlen / 32 ) -#include "./bli_packm_sifive_x280_intr_complex.c" +#include "./bli_packm_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -146,9 +149,9 @@ #define LMUL_NR m2 #define FLT_SIZE sizeof(double) #define MR 6 -#define NR 16 +#define NR ( 2 * __riscv_v_min_vlen / 64 ) -#include "./bli_packm_sifive_x280_intr_complex.c" +#include "./bli_packm_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr_complex.c b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr_complex.c index ee49090dc9..2173be3a74 100644 --- a/kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr_complex.c @@ -522,20 +522,21 @@ PACKM(PRECISION_CHAR, void) // generic kernel else { - REF_KERNEL(PRECISION_CHAR) + BLI_SCAL2BBS_MXN(PRECISION_CHAR) ( conja, - schema, cdim, - cdim_max, - cdim_bcast, n, - n_max, kappa, - a, inca, lda, - p, ldp, - params, - cntx + a, inca, lda, + p, cdim_bcast, ldp + ); + + BLI_SET0S_EDGE(PRECISION_CHAR) + ( + cdim*cdim_bcast, cdim_max*cdim_bcast, + n, n_max, + p, ldp ); } diff --git a/kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr_real.c b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr_real.c similarity index 98% rename from kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr_real.c rename to kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr_real.c index 741714d60a..c853765a2f 100644 --- a/kernels/sifive_x280/1m/bli_packm_sifive_x280_intr/bli_packm_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr_real.c @@ -37,8 +37,7 @@ PACKM(PRECISION_CHAR, void) { - (void) conja; // Suppress unused parameter warnings - (void) schema; + (void) schema; // Suppress unused parameter warnings (void) params; (void) cntx; const DATATYPE* restrict kappa = kappa_; @@ -341,20 +340,21 @@ PACKM(PRECISION_CHAR, void) // generic kernel else { - REF_KERNEL(PRECISION_CHAR) + BLI_SCAL2BBS_MXN(PRECISION_CHAR) ( conja, - schema, cdim, - cdim_max, - cdim_bcast, n, - n_max, kappa, - a, inca, lda, - p, ldp, - params, - cntx + a, inca, lda, + p, cdim_bcast, ldp + ); + + BLI_SET0S_EDGE(PRECISION_CHAR) + ( + cdim*cdim_bcast, cdim_max*cdim_bcast, + n, n_max, + p, ldp ); } diff --git a/kernels/sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr.c b/kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c similarity index 90% rename from kernels/sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr.c rename to kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c index 664d4616f3..564ce25a19 100644 --- a/kernels/sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr.c +++ b/kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c @@ -39,7 +39,7 @@ #include #include -#define GEMM_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##gemm_sifive_x280_intr(\ +#define GEMM_(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##gemm_sifive_rvv_intr(\ dim_t m, \ dim_t n, \ dim_t k, \ @@ -61,9 +61,9 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) #define PACKMR 8 -#define PACKNR 64 +#define PACKNR ( 4 * __riscv_v_min_vlen / 32 ) -#include "./bli_gemm_sifive_x280_intr_real.c" +#include "./bli_gemm_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -80,9 +80,9 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) #define PACKMR 8 -#define PACKNR 32 +#define PACKNR ( 4 * __riscv_v_min_vlen / 64 ) -#include "./bli_gemm_sifive_x280_intr_real.c" +#include "./bli_gemm_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -100,9 +100,9 @@ #define LMUL m2 #define FLT_SIZE sizeof(float) #define PACKMR 8 -#define PACKNR 32 +#define PACKNR ( 2 * __riscv_v_min_vlen / 32 ) -#include "./bli_gemm_sifive_x280_intr_complex.c" +#include "./bli_gemm_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -121,9 +121,9 @@ #define LMUL m2 #define FLT_SIZE sizeof(double) #define PACKMR 8 -#define PACKNR 16 +#define PACKNR ( 2 * __riscv_v_min_vlen / 64 ) -#include "./bli_gemm_sifive_x280_intr_complex.c" +#include "./bli_gemm_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr_complex.c b/kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr_complex.c similarity index 100% rename from kernels/sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr_complex.c diff --git a/kernels/sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr_real.c b/kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr_real.c similarity index 100% rename from kernels/sifive_x280/3/bli_gemm_sifive_x280_intr/bli_gemm_sifive_x280_intr_real.c rename to kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr_real.c diff --git a/kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr.c b/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr.c similarity index 89% rename from kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr.c rename to kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr.c index 687abec185..9b2b4968f3 100644 --- a/kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr.c +++ b/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr.c @@ -35,11 +35,11 @@ // clang-format off #include "blis.h" #include "../../riscv_cmul_macros_intr.h" -#include "../../bli_kernels_sifive_x280.h" +#include "../../bli_kernels_sifive_rvv.h" #include #include -#define GEMMTRSM_L(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##gemmtrsm_l_sifive_x280_intr(\ +#define GEMMTRSM_L(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##gemmtrsm_l_sifive_rvv_intr(\ dim_t m, \ dim_t n, \ dim_t k, \ @@ -55,7 +55,7 @@ const cntx_t* restrict cntx \ ) -#define GEMMTRSM_U(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##gemmtrsm_u_sifive_x280_intr(\ +#define GEMMTRSM_U(PRECISION_CHAR, T) void bli_##PRECISION_CHAR##gemmtrsm_u_sifive_rvv_intr(\ dim_t m, \ dim_t n, \ dim_t k, \ @@ -80,9 +80,9 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) #define PACKMR 8 -#define PACKNR 64 +#define PACKNR ( 4 * __riscv_v_min_vlen / 32 ) -#include "./bli_gemmtrsm_sifive_x280_intr_real.c" +#include "./bli_gemmtrsm_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -99,9 +99,9 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) #define PACKMR 8 -#define PACKNR 32 +#define PACKNR ( 4 * __riscv_v_min_vlen / 64 ) -#include "./bli_gemmtrsm_sifive_x280_intr_real.c" +#include "./bli_gemmtrsm_sifive_rvv_intr_real.c" #undef DATATYPE #undef PRECISION_CHAR @@ -119,9 +119,9 @@ #define LMUL m2 #define FLT_SIZE sizeof(float) #define PACKMR 8 -#define PACKNR 32 +#define PACKNR ( 2 * __riscv_v_min_vlen / 32 ) -#include "./bli_gemmtrsm_sifive_x280_intr_complex.c" +#include "./bli_gemmtrsm_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT @@ -140,9 +140,9 @@ #define LMUL m2 #define FLT_SIZE sizeof(double) #define PACKMR 8 -#define PACKNR 16 +#define PACKNR ( 2 * __riscv_v_min_vlen / 64 ) -#include "./bli_gemmtrsm_sifive_x280_intr_complex.c" +#include "./bli_gemmtrsm_sifive_rvv_intr_complex.c" #undef DATATYPE #undef BASE_DT diff --git a/kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr_complex.c b/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr_complex.c similarity index 99% rename from kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr_complex.c rename to kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr_complex.c index 88ea04b7a9..7f2fc1c893 100644 --- a/kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr_complex.c +++ b/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr_complex.c @@ -35,7 +35,7 @@ // clang-format off #ifdef GEMMTRSM -#define GEMMTRSM_IMPL_NAME_(PRECISION_CHAR) bli_##PRECISION_CHAR##gemmtrsm_sifive_x280_intr +#define GEMMTRSM_IMPL_NAME_(PRECISION_CHAR) bli_##PRECISION_CHAR##gemmtrsm_sifive_rvv_intr #define GEMMTRSM_IMPL_NAME(PRECISION_CHAR) GEMMTRSM_IMPL_NAME_(PRECISION_CHAR) static void GEMMTRSM_IMPL_NAME(PRECISION_CHAR) diff --git a/kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr_real.c b/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr_real.c similarity index 99% rename from kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr_real.c rename to kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr_real.c index 7c3c3b8b7b..b628e4cc11 100644 --- a/kernels/sifive_x280/3/bli_gemmtrsm_sifive_x280_intr/bli_gemmtrsm_sifive_x280_intr_real.c +++ b/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr_real.c @@ -35,7 +35,7 @@ // clang-format off #ifdef GEMMTRSM -#define GEMMTRSM_IMPL_NAME_(PRECISION_CHAR) bli_##PRECISION_CHAR##gemmtrsm_sifive_x280_intr +#define GEMMTRSM_IMPL_NAME_(PRECISION_CHAR) bli_##PRECISION_CHAR##gemmtrsm_sifive_rvv_intr #define GEMMTRSM_IMPL_NAME(PRECISION_CHAR) GEMMTRSM_IMPL_NAME_(PRECISION_CHAR) static void GEMMTRSM_IMPL_NAME(PRECISION_CHAR) diff --git a/kernels/sifive_rvv/bli_kernels_sifive_rvv.h b/kernels/sifive_rvv/bli_kernels_sifive_rvv.h new file mode 100644 index 0000000000..f9f0f8995c --- /dev/null +++ b/kernels/sifive_rvv/bli_kernels_sifive_rvv.h @@ -0,0 +1,162 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2024, SiFive, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +// Level 1 +ADDV_KER_PROT(float, s, addv_sifive_rvv_intr) +ADDV_KER_PROT(double, d, addv_sifive_rvv_intr) +ADDV_KER_PROT(scomplex, c, addv_sifive_rvv_intr) +ADDV_KER_PROT(dcomplex, z, addv_sifive_rvv_intr) + +AMAXV_KER_PROT(float, s, amaxv_sifive_rvv_intr) +AMAXV_KER_PROT(double, d, amaxv_sifive_rvv_intr) +AMAXV_KER_PROT(scomplex, c, amaxv_sifive_rvv_intr) +AMAXV_KER_PROT(dcomplex, z, amaxv_sifive_rvv_intr) + +AXPBYV_KER_PROT(float, s, axpbyv_sifive_rvv_intr) +AXPBYV_KER_PROT(double, d, axpbyv_sifive_rvv_intr) +AXPBYV_KER_PROT(scomplex, c, axpbyv_sifive_rvv_intr) +AXPBYV_KER_PROT(dcomplex, z, axpbyv_sifive_rvv_intr) + +AXPYV_KER_PROT(float, s, axpyv_sifive_rvv_intr) +AXPYV_KER_PROT(double, d, axpyv_sifive_rvv_intr) +AXPYV_KER_PROT(scomplex, c, axpyv_sifive_rvv_intr) +AXPYV_KER_PROT(dcomplex, z, axpyv_sifive_rvv_intr) + +COPYV_KER_PROT(float, s, copyv_sifive_rvv_intr) +COPYV_KER_PROT(double, d, copyv_sifive_rvv_intr) +COPYV_KER_PROT(scomplex, c, copyv_sifive_rvv_intr) +COPYV_KER_PROT(dcomplex, z, copyv_sifive_rvv_intr) + +DOTV_KER_PROT(float, s, dotv_sifive_rvv_intr) +DOTV_KER_PROT(double, d, dotv_sifive_rvv_intr) +DOTV_KER_PROT(scomplex, c, dotv_sifive_rvv_intr) +DOTV_KER_PROT(dcomplex, z, dotv_sifive_rvv_intr) + +DOTXV_KER_PROT(float, s, dotxv_sifive_rvv_intr) +DOTXV_KER_PROT(double, d, dotxv_sifive_rvv_intr) +DOTXV_KER_PROT(scomplex, c, dotxv_sifive_rvv_intr) +DOTXV_KER_PROT(dcomplex, z, dotxv_sifive_rvv_intr) + +INVERTV_KER_PROT(float, s, invertv_sifive_rvv_intr) +INVERTV_KER_PROT(double, d, invertv_sifive_rvv_intr) +INVERTV_KER_PROT(scomplex, c, invertv_sifive_rvv_intr) +INVERTV_KER_PROT(dcomplex, z, invertv_sifive_rvv_intr) + +INVSCALV_KER_PROT(float, s, invscalv_sifive_rvv_intr) +INVSCALV_KER_PROT(double, d, invscalv_sifive_rvv_intr) +INVSCALV_KER_PROT(scomplex, c, invscalv_sifive_rvv_intr) +INVSCALV_KER_PROT(dcomplex, z, invscalv_sifive_rvv_intr) + +SCAL2V_KER_PROT(float, s, scal2v_sifive_rvv_intr) +SCAL2V_KER_PROT(double, d, scal2v_sifive_rvv_intr) +SCAL2V_KER_PROT(scomplex, c, scal2v_sifive_rvv_intr) +SCAL2V_KER_PROT(dcomplex, z, scal2v_sifive_rvv_intr) + +SCALV_KER_PROT(float, s, scalv_sifive_rvv_intr) +SCALV_KER_PROT(double, d, scalv_sifive_rvv_intr) +SCALV_KER_PROT(scomplex, c, scalv_sifive_rvv_intr) +SCALV_KER_PROT(dcomplex, z, scalv_sifive_rvv_intr) + +SETV_KER_PROT(float, s, setv_sifive_rvv_intr) +SETV_KER_PROT(double, d, setv_sifive_rvv_intr) +SETV_KER_PROT(scomplex, c, setv_sifive_rvv_intr) +SETV_KER_PROT(dcomplex, z, setv_sifive_rvv_intr) + +SUBV_KER_PROT(float, s, subv_sifive_rvv_intr) +SUBV_KER_PROT(double, d, subv_sifive_rvv_intr) +SUBV_KER_PROT(scomplex, c, subv_sifive_rvv_intr) +SUBV_KER_PROT(dcomplex, z, subv_sifive_rvv_intr) + +SWAPV_KER_PROT(float, s, swapv_sifive_rvv_intr) +SWAPV_KER_PROT(double, d, swapv_sifive_rvv_intr) +SWAPV_KER_PROT(scomplex, c, swapv_sifive_rvv_intr) +SWAPV_KER_PROT(dcomplex, z, swapv_sifive_rvv_intr) + +XPBYV_KER_PROT(float, s, xpbyv_sifive_rvv_intr) +XPBYV_KER_PROT(double, d, xpbyv_sifive_rvv_intr) +XPBYV_KER_PROT(scomplex, c, xpbyv_sifive_rvv_intr) +XPBYV_KER_PROT(dcomplex, z, xpbyv_sifive_rvv_intr) + +// Level 1f +AXPY2V_KER_PROT(float, s, axpy2v_sifive_rvv_intr) +AXPY2V_KER_PROT(double, d, axpy2v_sifive_rvv_intr) +AXPY2V_KER_PROT(scomplex, c, axpy2v_sifive_rvv_intr) +AXPY2V_KER_PROT(dcomplex, z, axpy2v_sifive_rvv_intr) + +AXPYF_KER_PROT(float, s, axpyf_sifive_rvv_intr) +AXPYF_KER_PROT(double, d, axpyf_sifive_rvv_intr) +AXPYF_KER_PROT(scomplex, c, axpyf_sifive_rvv_intr) +AXPYF_KER_PROT(dcomplex, z, axpyf_sifive_rvv_intr) + +DOTXF_KER_PROT(float, s, dotxf_sifive_rvv_intr) +DOTXF_KER_PROT(double, d, dotxf_sifive_rvv_intr) +DOTXF_KER_PROT(scomplex, c, dotxf_sifive_rvv_intr) +DOTXF_KER_PROT(dcomplex, z, dotxf_sifive_rvv_intr) + +DOTAXPYV_KER_PROT(float, s, dotaxpyv_sifive_rvv_intr) +DOTAXPYV_KER_PROT(double, d, dotaxpyv_sifive_rvv_intr) +DOTAXPYV_KER_PROT(scomplex, c, dotaxpyv_sifive_rvv_intr) +DOTAXPYV_KER_PROT(dcomplex, z, dotaxpyv_sifive_rvv_intr) + +DOTXAXPYF_KER_PROT(float, s, dotxaxpyf_sifive_rvv_intr) +DOTXAXPYF_KER_PROT(double, d, dotxaxpyf_sifive_rvv_intr) +DOTXAXPYF_KER_PROT(scomplex,c, dotxaxpyf_sifive_rvv_intr) +DOTXAXPYF_KER_PROT(dcomplex,z, dotxaxpyf_sifive_rvv_intr) + +// Level 1m +PACKM_KER_PROT(float, s, packm_sifive_rvv_intr) +PACKM_KER_PROT(double, d, packm_sifive_rvv_intr) +PACKM_KER_PROT(scomplex, c, packm_sifive_rvv_intr) +PACKM_KER_PROT(dcomplex, z, packm_sifive_rvv_intr) + +// Reference 1m +PACKM_KER_PROT(float, ss, packm_sifive_rvv_ref) +PACKM_KER_PROT(double, dd, packm_sifive_rvv_ref) +PACKM_KER_PROT(scomplex, cc, packm_sifive_rvv_ref) +PACKM_KER_PROT(dcomplex, zz, packm_sifive_rvv_ref) + +// Level 3 +GEMM_UKR_PROT(float, s, gemm_sifive_rvv_intr) +GEMM_UKR_PROT(double, d, gemm_sifive_rvv_intr) +GEMM_UKR_PROT(scomplex, c, gemm_sifive_rvv_intr) +GEMM_UKR_PROT(dcomplex, z, gemm_sifive_rvv_intr) + +GEMMTRSM_UKR_PROT(float, s, gemmtrsm_l_sifive_rvv_intr) +GEMMTRSM_UKR_PROT(double, d, gemmtrsm_l_sifive_rvv_intr) +GEMMTRSM_UKR_PROT(scomplex, c, gemmtrsm_l_sifive_rvv_intr) +GEMMTRSM_UKR_PROT(dcomplex, z, gemmtrsm_l_sifive_rvv_intr) +GEMMTRSM_UKR_PROT(float, s, gemmtrsm_u_sifive_rvv_intr) +GEMMTRSM_UKR_PROT(double, d, gemmtrsm_u_sifive_rvv_intr) +GEMMTRSM_UKR_PROT(scomplex, c, gemmtrsm_u_sifive_rvv_intr) +GEMMTRSM_UKR_PROT(dcomplex, z, gemmtrsm_u_sifive_rvv_intr) diff --git a/kernels/sifive_x280/riscv_cmul_macros_intr.h b/kernels/sifive_rvv/riscv_cmul_macros_intr.h similarity index 100% rename from kernels/sifive_x280/riscv_cmul_macros_intr.h rename to kernels/sifive_rvv/riscv_cmul_macros_intr.h diff --git a/kernels/sifive_x280/riscv_overloaded_intrinsics.h b/kernels/sifive_rvv/riscv_overloaded_intrinsics.h similarity index 99% rename from kernels/sifive_x280/riscv_overloaded_intrinsics.h rename to kernels/sifive_rvv/riscv_overloaded_intrinsics.h index 44f70f2727..794c44c092 100644 --- a/kernels/sifive_x280/riscv_overloaded_intrinsics.h +++ b/kernels/sifive_rvv/riscv_overloaded_intrinsics.h @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, SiFive, Inc. + Copyright (C) 2024, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/sifive_x280/bli_kernels_sifive_x280.h b/kernels/sifive_x280/bli_kernels_sifive_x280.h deleted file mode 100644 index ff7b445c47..0000000000 --- a/kernels/sifive_x280/bli_kernels_sifive_x280.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2023, SiFive, Inc. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -// Level 1 -ADDV_KER_PROT(float, s, addv_sifive_x280_intr) -ADDV_KER_PROT(double, d, addv_sifive_x280_intr) -ADDV_KER_PROT(scomplex, c, addv_sifive_x280_intr) -ADDV_KER_PROT(dcomplex, z, addv_sifive_x280_intr) - -AMAXV_KER_PROT(float, s, amaxv_sifive_x280_intr) -AMAXV_KER_PROT(double, d, amaxv_sifive_x280_intr) -AMAXV_KER_PROT(scomplex, c, amaxv_sifive_x280_intr) -AMAXV_KER_PROT(dcomplex, z, amaxv_sifive_x280_intr) - -AXPBYV_KER_PROT(float, s, axpbyv_sifive_x280_intr) -AXPBYV_KER_PROT(double, d, axpbyv_sifive_x280_intr) -AXPBYV_KER_PROT(scomplex, c, axpbyv_sifive_x280_intr) -AXPBYV_KER_PROT(dcomplex, z, axpbyv_sifive_x280_intr) - -AXPYV_KER_PROT(float, s, axpyv_sifive_x280_intr) -AXPYV_KER_PROT(double, d, axpyv_sifive_x280_intr) -AXPYV_KER_PROT(scomplex, c, axpyv_sifive_x280_intr) -AXPYV_KER_PROT(dcomplex, z, axpyv_sifive_x280_intr) - -COPYV_KER_PROT(float, s, copyv_sifive_x280_intr) -COPYV_KER_PROT(double, d, copyv_sifive_x280_intr) -COPYV_KER_PROT(scomplex, c, copyv_sifive_x280_intr) -COPYV_KER_PROT(dcomplex, z, copyv_sifive_x280_intr) - -DOTV_KER_PROT(float, s, dotv_sifive_x280_intr) -DOTV_KER_PROT(double, d, dotv_sifive_x280_intr) -DOTV_KER_PROT(scomplex, c, dotv_sifive_x280_intr) -DOTV_KER_PROT(dcomplex, z, dotv_sifive_x280_intr) - -DOTXV_KER_PROT(float, s, dotxv_sifive_x280_intr) -DOTXV_KER_PROT(double, d, dotxv_sifive_x280_intr) -DOTXV_KER_PROT(scomplex, c, dotxv_sifive_x280_intr) -DOTXV_KER_PROT(dcomplex, z, dotxv_sifive_x280_intr) - -INVERTV_KER_PROT(float, s, invertv_sifive_x280_intr) -INVERTV_KER_PROT(double, d, invertv_sifive_x280_intr) -INVERTV_KER_PROT(scomplex, c, invertv_sifive_x280_intr) -INVERTV_KER_PROT(dcomplex, z, invertv_sifive_x280_intr) - -INVSCALV_KER_PROT(float, s, invscalv_sifive_x280_intr) -INVSCALV_KER_PROT(double, d, invscalv_sifive_x280_intr) -INVSCALV_KER_PROT(scomplex, c, invscalv_sifive_x280_intr) -INVSCALV_KER_PROT(dcomplex, z, invscalv_sifive_x280_intr) - -SCAL2V_KER_PROT(float, s, scal2v_sifive_x280_intr) -SCAL2V_KER_PROT(double, d, scal2v_sifive_x280_intr) -SCAL2V_KER_PROT(scomplex, c, scal2v_sifive_x280_intr) -SCAL2V_KER_PROT(dcomplex, z, scal2v_sifive_x280_intr) - -SCALV_KER_PROT(float, s, scalv_sifive_x280_intr) -SCALV_KER_PROT(double, d, scalv_sifive_x280_intr) -SCALV_KER_PROT(scomplex, c, scalv_sifive_x280_intr) -SCALV_KER_PROT(dcomplex, z, scalv_sifive_x280_intr) - -SETV_KER_PROT(float, s, setv_sifive_x280_intr) -SETV_KER_PROT(double, d, setv_sifive_x280_intr) -SETV_KER_PROT(scomplex, c, setv_sifive_x280_intr) -SETV_KER_PROT(dcomplex, z, setv_sifive_x280_intr) - -SUBV_KER_PROT(float, s, subv_sifive_x280_intr) -SUBV_KER_PROT(double, d, subv_sifive_x280_intr) -SUBV_KER_PROT(scomplex, c, subv_sifive_x280_intr) -SUBV_KER_PROT(dcomplex, z, subv_sifive_x280_intr) - -SWAPV_KER_PROT(float, s, swapv_sifive_x280_intr) -SWAPV_KER_PROT(double, d, swapv_sifive_x280_intr) -SWAPV_KER_PROT(scomplex, c, swapv_sifive_x280_intr) -SWAPV_KER_PROT(dcomplex, z, swapv_sifive_x280_intr) - -XPBYV_KER_PROT(float, s, xpbyv_sifive_x280_intr) -XPBYV_KER_PROT(double, d, xpbyv_sifive_x280_intr) -XPBYV_KER_PROT(scomplex, c, xpbyv_sifive_x280_intr) -XPBYV_KER_PROT(dcomplex, z, xpbyv_sifive_x280_intr) - -// Level 1f -AXPY2V_KER_PROT(float, s, axpy2v_sifive_x280_intr) -AXPY2V_KER_PROT(double, d, axpy2v_sifive_x280_intr) -AXPY2V_KER_PROT(scomplex, c, axpy2v_sifive_x280_intr) -AXPY2V_KER_PROT(dcomplex, z, axpy2v_sifive_x280_intr) - -AXPYF_KER_PROT(float, s, axpyf_sifive_x280_intr) -AXPYF_KER_PROT(double, d, axpyf_sifive_x280_intr) -AXPYF_KER_PROT(scomplex, c, axpyf_sifive_x280_intr) -AXPYF_KER_PROT(dcomplex, z, axpyf_sifive_x280_intr) - -DOTXF_KER_PROT(float, s, dotxf_sifive_x280_intr) -DOTXF_KER_PROT(double, d, dotxf_sifive_x280_intr) -DOTXF_KER_PROT(scomplex, c, dotxf_sifive_x280_intr) -DOTXF_KER_PROT(dcomplex, z, dotxf_sifive_x280_intr) - -DOTAXPYV_KER_PROT(float, s, dotaxpyv_sifive_x280_intr) -DOTAXPYV_KER_PROT(double, d, dotaxpyv_sifive_x280_intr) -DOTAXPYV_KER_PROT(scomplex, c, dotaxpyv_sifive_x280_intr) -DOTAXPYV_KER_PROT(dcomplex, z, dotaxpyv_sifive_x280_intr) - -DOTXAXPYF_KER_PROT(float, s, dotxaxpyf_sifive_x280_intr) -DOTXAXPYF_KER_PROT(double, d, dotxaxpyf_sifive_x280_intr) -DOTXAXPYF_KER_PROT(scomplex,c, dotxaxpyf_sifive_x280_intr) -DOTXAXPYF_KER_PROT(dcomplex,z, dotxaxpyf_sifive_x280_intr) - -// Level 1m -PACKM_KER_PROT(float, s, packm_sifive_x280_intr) -PACKM_KER_PROT(double, d, packm_sifive_x280_intr) -PACKM_KER_PROT(scomplex, c, packm_sifive_x280_intr) -PACKM_KER_PROT(dcomplex, z, packm_sifive_x280_intr) - -// Reference 1m -PACKM_KER_PROT(float, ss, packm_sifive_x280_ref) -PACKM_KER_PROT(double, dd, packm_sifive_x280_ref) -PACKM_KER_PROT(scomplex, cc, packm_sifive_x280_ref) -PACKM_KER_PROT(dcomplex, zz, packm_sifive_x280_ref) - -// Level 3 -GEMM_UKR_PROT(float, s, gemm_sifive_x280_intr) -GEMM_UKR_PROT(double, d, gemm_sifive_x280_intr) -GEMM_UKR_PROT(scomplex, c, gemm_sifive_x280_intr) -GEMM_UKR_PROT(dcomplex, z, gemm_sifive_x280_intr) - -GEMMTRSM_UKR_PROT(float, s, gemmtrsm_l_sifive_x280_intr) -GEMMTRSM_UKR_PROT(double, d, gemmtrsm_l_sifive_x280_intr) -GEMMTRSM_UKR_PROT(scomplex, c, gemmtrsm_l_sifive_x280_intr) -GEMMTRSM_UKR_PROT(dcomplex, z, gemmtrsm_l_sifive_x280_intr) -GEMMTRSM_UKR_PROT(float, s, gemmtrsm_u_sifive_x280_intr) -GEMMTRSM_UKR_PROT(double, d, gemmtrsm_u_sifive_x280_intr) -GEMMTRSM_UKR_PROT(scomplex, c, gemmtrsm_u_sifive_x280_intr) -GEMMTRSM_UKR_PROT(dcomplex, z, gemmtrsm_u_sifive_x280_intr) diff --git a/kernels/sifive_x280/riscv_cmul_macros_asm.h b/kernels/sifive_x280/riscv_cmul_macros_asm.h deleted file mode 100644 index 9c33fd7bc5..0000000000 --- a/kernels/sifive_x280/riscv_cmul_macros_asm.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2023, SiFive, Inc. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -// macros to emit complex multiplication -// caveat: the destination registers cannot overlap the source registers! -// rd = rs1 * rs2 -#define cmul(rd_r, rd_i, rs1_r, rs1_i, rs2_r, rs2_i) \ - \ - __asm__(FMUL#rd_r", "#rs1_r", "#rs2_r);\ - __asm__(FMUL#rd_i", "#rs1_r", "#rs2_i);\ - __asm__(FNMSUB#rd_r", "#rs1_i", "#rs2_i", "#rd_r);\ - __asm__(FMADD#rd_i", "#rs1_i", "#rs2_r", "#rd_i) - -// vd = vs2 * f[rs1] -#define vcmul_vf(vd_r, vd_i, vs2_r, vs2_i, rs1_r, rs1_i) \ - \ - __asm__("vfmul.vf "#vd_r", "#vs2_r", "#rs1_r);\ - __asm__("vfmul.vf "#vd_i", "#vs2_r", "#rs1_i);\ - __asm__("vfnmsac.vf "#vd_r", "#rs1_i", "#vs2_i);\ - __asm__("vfmacc.vf "#vd_i", "#rs1_r", "#vs2_i) - -#define vcmul_vf2(vd_r, vd_i, vs2_r, vs2_i, rs1_r, rs1_i) \ - \ - __asm__("vfmul.vf "#vd_r", "#vs2_r", %0" : : "f"(rs1_r));\ - __asm__("vfmul.vf "#vd_i", "#vs2_r", %0" : : "f"(rs1_i));\ - __asm__("vfnmsac.vf "#vd_r", %0, "#vs2_i : : "f"(rs1_i));\ - __asm__("vfmacc.vf "#vd_i", %0, "#vs2_i : : "f"(rs1_r)) - -// vd = conj(vs2) * f[rs1] -#define vcmul_vf_conj(vd_r, vd_i, vs2_r, vs2_i, rs1_r, rs1_i) \ - \ - __asm__("vfmul.vf "#vd_r", "#vs2_r", "#rs1_r);\ - __asm__("vfmul.vf "#vd_i", "#vs2_r", "#rs1_i);\ - __asm__("vfmacc.vf "#vd_r", "#rs1_i", "#vs2_i);\ - __asm__("vfnmsac.vf "#vd_i", "#rs1_r", "#vs2_i) - -#define vcmul_vf_conj2(vd_r, vd_i, vs2_r, vs2_i, rs1_r, rs1_i) \ - \ - __asm__("vfmul.vf "#vd_r", "#vs2_r", %0" : : "f"(rs1_r));\ - __asm__("vfmul.vf "#vd_i", "#vs2_r", %0" : : "f"(rs1_i));\ - __asm__("vfmacc.vf "#vd_r", %0, "#vs2_i : : "f"(rs1_i));\ - __asm__("vfnmsac.vf "#vd_i", %0, "#vs2_i : : "f"(rs1_r)) - -// vd += vs2 * f[rs1] -#define vcmacc_vf(vd_r, vd_i, rs1_r, rs1_i, vs2_r, vs2_i) \ - \ - __asm__("vfmacc.vf "#vd_r", "#rs1_r", "#vs2_r);\ - __asm__("vfmacc.vf "#vd_i", "#rs1_i", "#vs2_r);\ - __asm__("vfnmsac.vf "#vd_r", "#rs1_i", "#vs2_i);\ - __asm__("vfmacc.vf "#vd_i", "#rs1_r", "#vs2_i) - -#define vcmacc_vf2(vd_r, vd_i, rs1_r, rs1_i, vs2_r, vs2_i) \ - \ - __asm__("vfmacc.vf "#vd_r", %0, "#vs2_r : : "f"(rs1_r));\ - __asm__("vfmacc.vf "#vd_i", %0, "#vs2_r : : "f"(rs1_i));\ - __asm__("vfnmsac.vf "#vd_r", %0, "#vs2_i : : "f"(rs1_i));\ - __asm__("vfmacc.vf "#vd_i", %0, "#vs2_i : : "f"(rs1_r)) - -// vd += conj(vs2) * f[rs1] -#define vcmacc_vf_conj(vd_r, vd_i, rs1_r, rs1_i, vs2_r, vs2_i) \ - \ - __asm__("vfmacc.vf "#vd_r", "#rs1_r", "#vs2_r);\ - __asm__("vfmacc.vf "#vd_i", "#rs1_i", "#vs2_r);\ - __asm__("vfmacc.vf "#vd_r", "#rs1_i", "#vs2_i);\ - __asm__("vfnmsac.vf "#vd_i", "#rs1_r", "#vs2_i) - -// vd -= vs2 * f[rs1] -#define vcnmsac_vf(vd_r, vd_i, rs1_r, rs1_i, vs2_r, vs2_i) \ - \ - __asm__("vfnmsac.vf "#vd_r", "#rs1_r", "#vs2_r);\ - __asm__("vfnmsac.vf "#vd_i", "#rs1_i", "#vs2_r);\ - __asm__("vfmacc.vf "#vd_r", "#rs1_i", "#vs2_i);\ - __asm__("vfnmsac.vf "#vd_i", "#rs1_r", "#vs2_i) - -// vd = vs2 * vs1 -#define vcmul_vv(vd_r, vd_i, vs2_r, vs2_i, vs1_r, vs1_i) \ - \ - __asm__("vfmul.vv "#vd_r", "#vs2_r", "#vs1_r);\ - __asm__("vfmul.vv "#vd_i", "#vs2_r", "#vs1_i);\ - __asm__("vfnmsac.vv "#vd_r", "#vs2_i", "#vs1_i);\ - __asm__("vfmacc.vv "#vd_i", "#vs2_i", "#vs1_r) - -// vd = vs2 * conj(vs1) -#define vcmul_vv_conj(vd_r, vd_i, vs2_r, vs2_i, vs1_r, vs1_i) \ - \ - __asm__("vfmul.vv "#vd_r", "#vs2_r", "#vs1_r);\ - __asm__("vfmul.vv "#vd_i", "#vs2_r", "#vs1_i);\ - __asm__("vfmacc.vv "#vd_r", "#vs2_i", "#vs1_i);\ - __asm__("vfmsac.vv "#vd_i", "#vs2_i", "#vs1_r) - -// vd += vs2 * vs1 -#define vcmacc_vv(vd_r, vd_i, vs2_r, vs2_i, vs1_r, vs1_i) \ - \ - __asm__("vfmacc.vv "#vd_r", "#vs2_r", "#vs1_r);\ - __asm__("vfmacc.vv "#vd_i", "#vs2_r", "#vs1_i);\ - __asm__("vfnmsac.vv "#vd_r", "#vs2_i", "#vs1_i);\ - __asm__("vfmacc.vv "#vd_i", "#vs2_i", "#vs1_r) - -// vd += vs2 * conj(vs1) -#define vcmacc_vv_conj(vd_r, vd_i, vs2_r, vs2_i, vs1_r, vs1_i) \ - \ - __asm__("vfmacc.vv "#vd_r", "#vs2_r", "#vs1_r);\ - __asm__("vfnmsac.vv "#vd_i", "#vs2_r", "#vs1_i);\ - __asm__("vfmacc.vv "#vd_r", "#vs2_i", "#vs1_i);\ - __asm__("vfmacc.vv "#vd_i", "#vs2_i", "#vs1_r) - From e40da7a2e7f8b1526e9b1d4ce1bc538974331cca Mon Sep 17 00:00:00 2001 From: Michael Yeh Date: Thu, 21 Nov 2024 15:21:44 -0800 Subject: [PATCH 2/3] Fix undefs in packm --- .../bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c index a0a4eb3c3d..cdd5a4035b 100644 --- a/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c +++ b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c @@ -164,8 +164,11 @@ #undef MR #undef NR -#undef REF_KERNEL_ -#undef REF_KERNEL +#undef BLI_SCAL2BBS_MXN_ +#undef BLI_SCAL2BBS_MXN + +#undef BLI_SET0S_EDGE_ +#undef BLI_SET0S_EDGE #undef PACKM #undef PACKM_ From 398fda270aeac34ad6bcf8aea76d9bd0567fb3ee Mon Sep 17 00:00:00 2001 From: Michael Yeh Date: Wed, 11 Dec 2024 15:54:29 -0800 Subject: [PATCH 3/3] Determine vlen at runtime --- config/sifive_rvv/bli_cntx_init_sifive_rvv.c | 7 +++++-- config/sifive_rvv/bli_kernel_defs_sifive_rvv.h | 8 ++++---- config/sifive_rvv/make_defs.mk | 2 +- .../bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c | 8 ++++---- .../3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c | 8 ++++---- .../bli_gemmtrsm_sifive_rvv_intr.c | 8 ++++---- 6 files changed, 22 insertions(+), 19 deletions(-) diff --git a/config/sifive_rvv/bli_cntx_init_sifive_rvv.c b/config/sifive_rvv/bli_cntx_init_sifive_rvv.c index 222a837434..54f17303fd 100644 --- a/config/sifive_rvv/bli_cntx_init_sifive_rvv.c +++ b/config/sifive_rvv/bli_cntx_init_sifive_rvv.c @@ -33,6 +33,7 @@ */ #include "blis.h" +#include void bli_cntx_init_sifive_rvv( cntx_t* cntx ) { @@ -43,6 +44,8 @@ void bli_cntx_init_sifive_rvv( cntx_t* cntx ) // ------------------------------------------------------------------------- + unsigned vlenb = __riscv_vlenb(); + // Update the context with optimized native kernels. bli_cntx_set_ukrs ( @@ -191,9 +194,9 @@ void bli_cntx_init_sifive_rvv( cntx_t* cntx ) // s d c z bli_blksz_init ( &blkszs[ BLIS_MR ], 7, 7, 6, 6, 8, 8, 8, 8 ); - bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4 * __riscv_v_min_vlen / 32, 4 * __riscv_v_min_vlen / 64, 2 * __riscv_v_min_vlen / 32, 2 * __riscv_v_min_vlen / 64 ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4 * vlenb / 4, 4 * vlenb / 8, 2 * vlenb / 4, 2 * vlenb / 8 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 7, 7, 6, 6 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4 * __riscv_v_min_vlen / 32, 4 * __riscv_v_min_vlen / 64, 2 * __riscv_v_min_vlen / 32, 2 * __riscv_v_min_vlen / 64 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4 * vlenb / 4, 4 * vlenb / 8, 2 * vlenb / 4, 2 * vlenb / 8 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 64, 64, 64, 64 ); // Default BLIS_BBM_s = 1, but set here to ensure it's correct bli_blksz_init_easy( &blkszs[ BLIS_BBM ], 1, 1, 1, 1 ); diff --git a/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h b/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h index c6db9aceb7..33543db50f 100644 --- a/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h +++ b/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h @@ -47,9 +47,9 @@ #define BLIS_PACKMR_c 8 #define BLIS_PACKMR_z 8 -#define BLIS_NR_s ( 4 * __riscv_v_min_vlen / 32 ) -#define BLIS_NR_d ( 4 * __riscv_v_min_vlen / 64 ) -#define BLIS_NR_c ( 2 * __riscv_v_min_vlen / 32 ) -#define BLIS_NR_z ( 2 * __riscv_v_min_vlen / 64 ) +#define BLIS_NR_s -1 +#define BLIS_NR_d -1 +#define BLIS_NR_c -1 +#define BLIS_NR_z -1 //#endif diff --git a/config/sifive_rvv/make_defs.mk b/config/sifive_rvv/make_defs.mk index 63c2d447fe..a4b3675e15 100644 --- a/config/sifive_rvv/make_defs.mk +++ b/config/sifive_rvv/make_defs.mk @@ -46,7 +46,7 @@ THIS_CONFIG := sifive_rvv # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. -CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb_zvl128b -mabi=lp64d +CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb -mabi=lp64d CMISCFLAGS_SIFIVE_OTHER := CPPROCFLAGS := CMISCFLAGS := $(CMISCFLAGS_SIFIVE) $(CMISCFLAGS_SIFIVE_OTHER) \ diff --git a/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c index cdd5a4035b..567a2a2b5d 100644 --- a/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c +++ b/kernels/sifive_rvv/1m/bli_packm_sifive_rvv_intr/bli_packm_sifive_rvv_intr.c @@ -77,7 +77,7 @@ #define LMUL_NR m4 #define FLT_SIZE sizeof(float) #define MR 7 -#define NR ( 4 * __riscv_v_min_vlen / 32 ) +#define NR ( 4 * __riscv_vlenb() / 4 ) #include "./bli_packm_sifive_rvv_intr_real.c" @@ -100,7 +100,7 @@ #define LMUL_NR m4 #define FLT_SIZE sizeof(double) #define MR 7 -#define NR ( 4 * __riscv_v_min_vlen / 64 ) +#define NR ( 4 * __riscv_vlenb() / 8 ) #include "./bli_packm_sifive_rvv_intr_real.c" @@ -124,7 +124,7 @@ #define LMUL_NR m2 #define FLT_SIZE sizeof(float) #define MR 6 -#define NR ( 2 * __riscv_v_min_vlen / 32 ) +#define NR ( 2 * __riscv_vlenb() / 4 ) #include "./bli_packm_sifive_rvv_intr_complex.c" @@ -149,7 +149,7 @@ #define LMUL_NR m2 #define FLT_SIZE sizeof(double) #define MR 6 -#define NR ( 2 * __riscv_v_min_vlen / 64 ) +#define NR ( 2 * __riscv_vlenb() / 8 ) #include "./bli_packm_sifive_rvv_intr_complex.c" diff --git a/kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c b/kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c index 564ce25a19..97722f13ce 100644 --- a/kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c +++ b/kernels/sifive_rvv/3/bli_gemm_sifive_rvv_intr/bli_gemm_sifive_rvv_intr.c @@ -61,7 +61,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) #define PACKMR 8 -#define PACKNR ( 4 * __riscv_v_min_vlen / 32 ) +#define PACKNR ( 4 * __riscv_vlenb() / 4 ) #include "./bli_gemm_sifive_rvv_intr_real.c" @@ -80,7 +80,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) #define PACKMR 8 -#define PACKNR ( 4 * __riscv_v_min_vlen / 64 ) +#define PACKNR ( 4 * __riscv_vlenb() / 8 ) #include "./bli_gemm_sifive_rvv_intr_real.c" @@ -100,7 +100,7 @@ #define LMUL m2 #define FLT_SIZE sizeof(float) #define PACKMR 8 -#define PACKNR ( 2 * __riscv_v_min_vlen / 32 ) +#define PACKNR ( 2 * __riscv_vlenb() / 4 ) #include "./bli_gemm_sifive_rvv_intr_complex.c" @@ -121,7 +121,7 @@ #define LMUL m2 #define FLT_SIZE sizeof(double) #define PACKMR 8 -#define PACKNR ( 2 * __riscv_v_min_vlen / 64 ) +#define PACKNR ( 2 * __riscv_vlenb() / 8 ) #include "./bli_gemm_sifive_rvv_intr_complex.c" diff --git a/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr.c b/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr.c index 9b2b4968f3..fa4ea309dd 100644 --- a/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr.c +++ b/kernels/sifive_rvv/3/bli_gemmtrsm_sifive_rvv_intr/bli_gemmtrsm_sifive_rvv_intr.c @@ -80,7 +80,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(float) #define PACKMR 8 -#define PACKNR ( 4 * __riscv_v_min_vlen / 32 ) +#define PACKNR ( 4 * __riscv_vlenb() / 4 ) #include "./bli_gemmtrsm_sifive_rvv_intr_real.c" @@ -99,7 +99,7 @@ #define LMUL m4 #define FLT_SIZE sizeof(double) #define PACKMR 8 -#define PACKNR ( 4 * __riscv_v_min_vlen / 64 ) +#define PACKNR ( 4 * __riscv_vlenb() / 8 ) #include "./bli_gemmtrsm_sifive_rvv_intr_real.c" @@ -119,7 +119,7 @@ #define LMUL m2 #define FLT_SIZE sizeof(float) #define PACKMR 8 -#define PACKNR ( 2 * __riscv_v_min_vlen / 32 ) +#define PACKNR ( 2 * __riscv_vlenb() / 4 ) #include "./bli_gemmtrsm_sifive_rvv_intr_complex.c" @@ -140,7 +140,7 @@ #define LMUL m2 #define FLT_SIZE sizeof(double) #define PACKMR 8 -#define PACKNR ( 2 * __riscv_v_min_vlen / 64 ) +#define PACKNR ( 2 * __riscv_vlenb() / 8 ) #include "./bli_gemmtrsm_sifive_rvv_intr_complex.c"