From 13224f4bf551593cf979198d390aed25840dfb3a Mon Sep 17 00:00:00 2001 From: Gabriele Ceccolini Date: Mon, 31 Mar 2025 13:30:53 +0200 Subject: [PATCH 1/4] First compiling version of rvv_sg2042 family configuration. Stil with the old kernels. --- blis.pc | 11 + config/rvv_sg2042/bli_cntx_init_rvv_sg2042.c | 115 + .../rvv_sg2042/bli_kernel_defs_rvv_sg2042.h | 42 + config/rvv_sg2042/make_defs.mk | 103 + config_registry | 5 +- frame/base/bli_arch.c | 4 + frame/include/bli_arch_config.h | 4 +- frame/include/bli_gentconf_macro_defs.h | 7 +- frame/include/bli_type_defs.h | 1 + include/blis.h | 1 + include/cblas.h | 1 + .../rvv_sg2042/3/bli_cgemm_rvv_sg2042_4vx4.c | 79 + .../3/bli_cgemm_rvv_sg2042_asm_4vx4.S | 45 + .../3/bli_czgemm_rvv_sg2042_asm_4vx4.h | 801 +++ .../rvv_sg2042/3/bli_dgemm_rvv_sg2042_4vx4.c | 79 + .../3/bli_dgemm_rvv_sg2042_asm_4vx4.S | 45 + kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h | 46 + .../3/bli_sdgemm_rvv_sg2042_asm_4vx4.h | 627 ++ .../rvv_sg2042/3/bli_sgemm_rvv_sg2042_4vx4.c | 80 + .../3/bli_sgemm_rvv_sg2042_asm_4vx4.S | 45 + .../rvv_sg2042/3/bli_zgemm_rvv_sg2042_4vx4.c | 80 + .../3/bli_zgemm_rvv_sg2042_asm_4vx4.S | 44 + .../3/rvv_sg2042_restore_registers.h | 77 + .../rvv_sg2042/3/rvv_sg2042_save_registers.h | 77 + kernels/rvv_sg2042/bli_kernels_rvv_sg2042.h | 38 + share/blis/avx.s | 6 + share/blis/avx512dq.s | 6 + share/blis/avx512f.s | 7 + share/blis/common.mk | 1365 ++++ .../blis/config/a64fx/bli_kernel_defs_a64fx.h | 52 + share/blis/config/a64fx/make_defs.mk | 82 + .../blis/config/altra/bli_kernel_defs_altra.h | 48 + share/blis/config/altra/make_defs.mk | 90 + .../altramax/bli_kernel_defs_altramax.h | 48 + share/blis/config/altramax/make_defs.mk | 90 + share/blis/config/amd64/make_defs.mk | 69 + share/blis/config/amd64_legacy/make_defs.mk | 70 + share/blis/config/arm32/make_defs.mk | 86 + share/blis/config/arm64/make_defs.mk | 90 + .../config/armsve/bli_kernel_defs_armsve.h | 58 + share/blis/config/armsve/make_defs.mk | 82 + share/blis/config/bgq/bli_kernel_defs_bgq.h | 48 + share/blis/config/bgq/make_defs.mk | 102 + .../bulldozer/bli_kernel_defs_bulldozer.h | 52 + share/blis/config/bulldozer/make_defs.mk | 90 + .../cortexa15/bli_kernel_defs_cortexa15.h | 48 + share/blis/config/cortexa15/make_defs.mk | 86 + .../cortexa53/bli_kernel_defs_cortexa53.h | 48 + share/blis/config/cortexa53/make_defs.mk | 90 + .../cortexa57/bli_kernel_defs_cortexa57.h | 48 + share/blis/config/cortexa57/make_defs.mk | 90 + .../cortexa9/bli_kernel_defs_cortexa9.h | 48 + share/blis/config/cortexa9/make_defs.mk | 86 + .../excavator/bli_kernel_defs_excavator.h | 52 + share/blis/config/excavator/make_defs.mk | 90 + .../firestorm/bli_kernel_defs_firestorm.h | 48 + share/blis/config/firestorm/make_defs.mk | 82 + .../config/generic/bli_kernel_defs_generic.h | 42 + share/blis/config/generic/make_defs.mk | 98 + .../config/haswell/bli_kernel_defs_haswell.h | 52 + share/blis/config/haswell/make_defs.mk | 100 + share/blis/config/intel64/make_defs.mk | 94 + share/blis/config/knl/bli_kernel_defs_knl.h | 48 + share/blis/config/knl/make_defs.mk | 118 + .../config/penryn/bli_kernel_defs_penryn.h | 48 + share/blis/config/penryn/make_defs.mk | 94 + .../piledriver/bli_kernel_defs_piledriver.h | 52 + share/blis/config/piledriver/make_defs.mk | 90 + share/blis/config/power/make_defs.mk | 82 + .../config/power10/bli_kernel_defs_power10.h | 49 + share/blis/config/power10/make_defs.mk | 83 + .../config/power9/bli_kernel_defs_power9.h | 49 + share/blis/config/power9/make_defs.mk | 84 + .../blis/config/rv32i/bli_kernel_defs_rv32i.h | 43 + share/blis/config/rv32i/make_defs.mk | 102 + .../config/rv32iv/bli_kernel_defs_rv32iv.h | 43 + share/blis/config/rv32iv/make_defs.mk | 104 + .../blis/config/rv64i/bli_kernel_defs_rv64i.h | 43 + share/blis/config/rv64i/make_defs.mk | 102 + .../config/rv64iv/bli_kernel_defs_rv64iv.h | 42 + share/blis/config/rv64iv/make_defs.mk | 103 + .../rvv_sg2042/bli_kernel_defs_rvv_sg2042.h | 42 + share/blis/config/rvv_sg2042/make_defs.mk | 103 + .../sandybridge/bli_kernel_defs_sandybridge.h | 52 + share/blis/config/sandybridge/make_defs.mk | 98 + .../sifive_rvv/bli_kernel_defs_sifive_rvv.h | 55 + share/blis/config/sifive_rvv/make_defs.mk | 80 + .../sifive_x280/bli_kernel_defs_sifive_x280.h | 55 + share/blis/config/sifive_x280/make_defs.mk | 80 + share/blis/config/skx/bli_kernel_defs_skx.h | 48 + share/blis/config/skx/make_defs.mk | 126 + .../steamroller/bli_kernel_defs_steamroller.h | 52 + share/blis/config/steamroller/make_defs.mk | 90 + .../thunderx2/bli_kernel_defs_thunderx2.h | 48 + share/blis/config/thunderx2/make_defs.mk | 90 + share/blis/config/x86_64/make_defs.mk | 94 + share/blis/config/zen/bli_kernel_defs_zen.h | 52 + share/blis/config/zen/make_defs.mk | 93 + share/blis/config/zen2/bli_kernel_defs_zen2.h | 52 + share/blis/config/zen2/make_defs.mk | 105 + share/blis/config/zen3/bli_kernel_defs_zen3.h | 52 + share/blis/config/zen3/make_defs.mk | 126 + share/blis/config_registry | 70 + share/blis/configure-plugin | 5551 +++++++++++++++++ share/blis/flatten-headers.py | 556 ++ share/blis/fma3.s | 5 + share/blis/fma4.s | 5 + share/blis/fragment.mk | 78 + share/blis/gen-make-frag.sh | 588 ++ share/blis/ignore_list | 8 + share/blis/mirror-tree.sh | 165 + share/blis/plugin/Makefile | 524 ++ share/blis/plugin/bli_plugin.h.in | 146 + share/blis/plugin/bli_plugin_init_ref.c | 108 + share/blis/plugin/bli_plugin_init_zen3.c | 96 + share/blis/plugin/bli_plugin_register.c | 81 + share/blis/plugin/config.mk.in | 145 + share/blis/plugin/my_kernel_1_ref.c | 56 + share/blis/plugin/my_kernel_1_zen3.c | 63 + share/blis/plugin/my_kernel_2_ref.c | 76 + share/blis/special_list | 2 + share/blis/suffix_list | 6 + share/pkgconfig/blis.pc | 11 + test/output_gemv_blis.m | 0 test/output_ger_blis.m | 0 test/output_hemv_blis.m | 0 126 files changed, 17508 insertions(+), 4 deletions(-) create mode 100644 blis.pc create mode 100644 config/rvv_sg2042/bli_cntx_init_rvv_sg2042.c create mode 100644 config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h create mode 100644 config/rvv_sg2042/make_defs.mk create mode 100644 include/blis.h create mode 100644 include/cblas.h create mode 100644 kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_4vx4.c create mode 100644 kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_asm_4vx4.S create mode 100644 kernels/rvv_sg2042/3/bli_czgemm_rvv_sg2042_asm_4vx4.h create mode 100644 kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_4vx4.c create mode 100644 kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_asm_4vx4.S create mode 100644 kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h create mode 100644 kernels/rvv_sg2042/3/bli_sdgemm_rvv_sg2042_asm_4vx4.h create mode 100644 kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_4vx4.c create mode 100644 kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_asm_4vx4.S create mode 100644 kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_4vx4.c create mode 100644 kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_asm_4vx4.S create mode 100644 kernels/rvv_sg2042/3/rvv_sg2042_restore_registers.h create mode 100644 kernels/rvv_sg2042/3/rvv_sg2042_save_registers.h create mode 100644 kernels/rvv_sg2042/bli_kernels_rvv_sg2042.h create mode 100644 share/blis/avx.s create mode 100644 share/blis/avx512dq.s create mode 100644 share/blis/avx512f.s create mode 100644 share/blis/common.mk create mode 100644 share/blis/config/a64fx/bli_kernel_defs_a64fx.h create mode 100644 share/blis/config/a64fx/make_defs.mk create mode 100644 share/blis/config/altra/bli_kernel_defs_altra.h create mode 100644 share/blis/config/altra/make_defs.mk create mode 100644 share/blis/config/altramax/bli_kernel_defs_altramax.h create mode 100644 share/blis/config/altramax/make_defs.mk create mode 100644 share/blis/config/amd64/make_defs.mk create mode 100644 share/blis/config/amd64_legacy/make_defs.mk create mode 100644 share/blis/config/arm32/make_defs.mk create mode 100644 share/blis/config/arm64/make_defs.mk create mode 100644 share/blis/config/armsve/bli_kernel_defs_armsve.h create mode 100644 share/blis/config/armsve/make_defs.mk create mode 100644 share/blis/config/bgq/bli_kernel_defs_bgq.h create mode 100644 share/blis/config/bgq/make_defs.mk create mode 100644 share/blis/config/bulldozer/bli_kernel_defs_bulldozer.h create mode 100644 share/blis/config/bulldozer/make_defs.mk create mode 100644 share/blis/config/cortexa15/bli_kernel_defs_cortexa15.h create mode 100644 share/blis/config/cortexa15/make_defs.mk create mode 100644 share/blis/config/cortexa53/bli_kernel_defs_cortexa53.h create mode 100644 share/blis/config/cortexa53/make_defs.mk create mode 100644 share/blis/config/cortexa57/bli_kernel_defs_cortexa57.h create mode 100644 share/blis/config/cortexa57/make_defs.mk create mode 100644 share/blis/config/cortexa9/bli_kernel_defs_cortexa9.h create mode 100644 share/blis/config/cortexa9/make_defs.mk create mode 100644 share/blis/config/excavator/bli_kernel_defs_excavator.h create mode 100644 share/blis/config/excavator/make_defs.mk create mode 100644 share/blis/config/firestorm/bli_kernel_defs_firestorm.h create mode 100644 share/blis/config/firestorm/make_defs.mk create mode 100644 share/blis/config/generic/bli_kernel_defs_generic.h create mode 100644 share/blis/config/generic/make_defs.mk create mode 100644 share/blis/config/haswell/bli_kernel_defs_haswell.h create mode 100644 share/blis/config/haswell/make_defs.mk create mode 100644 share/blis/config/intel64/make_defs.mk create mode 100644 share/blis/config/knl/bli_kernel_defs_knl.h create mode 100644 share/blis/config/knl/make_defs.mk create mode 100644 share/blis/config/penryn/bli_kernel_defs_penryn.h create mode 100644 share/blis/config/penryn/make_defs.mk create mode 100644 share/blis/config/piledriver/bli_kernel_defs_piledriver.h create mode 100644 share/blis/config/piledriver/make_defs.mk create mode 100644 share/blis/config/power/make_defs.mk create mode 100644 share/blis/config/power10/bli_kernel_defs_power10.h create mode 100644 share/blis/config/power10/make_defs.mk create mode 100644 share/blis/config/power9/bli_kernel_defs_power9.h create mode 100644 share/blis/config/power9/make_defs.mk create mode 100644 share/blis/config/rv32i/bli_kernel_defs_rv32i.h create mode 100644 share/blis/config/rv32i/make_defs.mk create mode 100644 share/blis/config/rv32iv/bli_kernel_defs_rv32iv.h create mode 100644 share/blis/config/rv32iv/make_defs.mk create mode 100644 share/blis/config/rv64i/bli_kernel_defs_rv64i.h create mode 100644 share/blis/config/rv64i/make_defs.mk create mode 100644 share/blis/config/rv64iv/bli_kernel_defs_rv64iv.h create mode 100644 share/blis/config/rv64iv/make_defs.mk create mode 100644 share/blis/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h create mode 100644 share/blis/config/rvv_sg2042/make_defs.mk create mode 100644 share/blis/config/sandybridge/bli_kernel_defs_sandybridge.h create mode 100644 share/blis/config/sandybridge/make_defs.mk create mode 100644 share/blis/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h create mode 100644 share/blis/config/sifive_rvv/make_defs.mk create mode 100644 share/blis/config/sifive_x280/bli_kernel_defs_sifive_x280.h create mode 100644 share/blis/config/sifive_x280/make_defs.mk create mode 100644 share/blis/config/skx/bli_kernel_defs_skx.h create mode 100644 share/blis/config/skx/make_defs.mk create mode 100644 share/blis/config/steamroller/bli_kernel_defs_steamroller.h create mode 100644 share/blis/config/steamroller/make_defs.mk create mode 100644 share/blis/config/thunderx2/bli_kernel_defs_thunderx2.h create mode 100644 share/blis/config/thunderx2/make_defs.mk create mode 100644 share/blis/config/x86_64/make_defs.mk create mode 100644 share/blis/config/zen/bli_kernel_defs_zen.h create mode 100644 share/blis/config/zen/make_defs.mk create mode 100644 share/blis/config/zen2/bli_kernel_defs_zen2.h create mode 100644 share/blis/config/zen2/make_defs.mk create mode 100644 share/blis/config/zen3/bli_kernel_defs_zen3.h create mode 100644 share/blis/config/zen3/make_defs.mk create mode 100644 share/blis/config_registry create mode 100755 share/blis/configure-plugin create mode 100644 share/blis/flatten-headers.py create mode 100644 share/blis/fma3.s create mode 100644 share/blis/fma4.s create mode 100644 share/blis/fragment.mk create mode 100755 share/blis/gen-make-frag.sh create mode 100644 share/blis/ignore_list create mode 100755 share/blis/mirror-tree.sh create mode 100644 share/blis/plugin/Makefile create mode 100644 share/blis/plugin/bli_plugin.h.in create mode 100644 share/blis/plugin/bli_plugin_init_ref.c create mode 100644 share/blis/plugin/bli_plugin_init_zen3.c create mode 100644 share/blis/plugin/bli_plugin_register.c create mode 100644 share/blis/plugin/config.mk.in create mode 100644 share/blis/plugin/my_kernel_1_ref.c create mode 100644 share/blis/plugin/my_kernel_1_zen3.c create mode 100644 share/blis/plugin/my_kernel_2_ref.c create mode 100644 share/blis/special_list create mode 100644 share/blis/suffix_list create mode 100644 share/pkgconfig/blis.pc create mode 100644 test/output_gemv_blis.m create mode 100644 test/output_ger_blis.m create mode 100644 test/output_hemv_blis.m diff --git a/blis.pc b/blis.pc new file mode 100644 index 0000000000..3c3f71861f --- /dev/null +++ b/blis.pc @@ -0,0 +1,11 @@ +prefix=/home/gceccolini/blis_sg2042 +exec_prefix=/home/gceccolini/blis_sg2042 +libdir=/home/gceccolini/blis_sg2042/lib +includedir=/home/gceccolini/blis_sg2042/include + +Name: BLIS +Description: BLAS-like Library Instantiation Software Framework +Version: 0.9.0-180 +Libs: -L${libdir} -lblis +Libs.private: -lm -lpthread -latomic +Cflags: -I${includedir}/blis diff --git a/config/rvv_sg2042/bli_cntx_init_rvv_sg2042.c b/config/rvv_sg2042/bli_cntx_init_rvv_sg2042.c new file mode 100644 index 0000000000..968446d425 --- /dev/null +++ b/config/rvv_sg2042/bli_cntx_init_rvv_sg2042.c @@ -0,0 +1,115 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "../../kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h" + +void bli_cntx_init_rvv_sg2042( cntx_t* cntx ) +{ + printf("!!!SONO LA NUOVA SUB-CONFIGURAZIONE!!!\n"); + blksz_t blkszs[ BLIS_NUM_BLKSZS ]; + + // Set default kernel blocksizes and functions. + bli_cntx_init_rvv_sg2042_ref( cntx ); + + // ------------------------------------------------------------------------- + + // A reasonable assumptions for application cores is VLEN >= 128 bits, i.e., + // v >= 4. Embedded cores, however, may implement the minimal configuration, + // which allows VLEN = 32 bits. Here, we assume VLEN >= 128 and otherwise + // fall back to the reference kernels. + const uint32_t v = get_vlenb() / sizeof(float); + + if ( v >= 4 ) + { + const uint32_t mr_s = 4 * v; + const uint32_t mr_d = 2 * v; + const uint32_t mr_c = 2 * v; + const uint32_t mr_z = v; + + // TODO: Register different kernels based on the value + // of v to avoid MC becoming too big. (e.g. 2vx8) + + // Update the context with optimized native gemm micro-kernels. + bli_cntx_set_ukrs + ( + cntx, + + // level-3 + BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_rvv_sg2042_4vx4, + BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_rvv_sg2042_4vx4, + BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_rvv_sg2042_4vx4, + BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_rvv_sg2042_4vx4, + + BLIS_VA_END + ); + + // Update the context with storage preferences. + bli_cntx_set_ukr_prefs + ( + cntx, + + // level-3 + BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, + + BLIS_VA_END + ); + + // Initialize level-3 blocksize objects with architecture-specific values. + // s d c z + bli_blksz_init_easy( &blkszs[ BLIS_MR ], mr_s, mr_d, mr_c, mr_z ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 4, 4 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 20*mr_s, 20*mr_d, 60*mr_c, 30*mr_z ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 320, 320, 160 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, 3072, 3072 ); + + // Update the context with the current architecture's register and cache + // blocksizes (and multiples) for native execution. + bli_cntx_set_blkszs + ( + cntx, + + // level-3 + BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, + BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, + BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, + BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, + BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, + + BLIS_VA_END + ); + } +} diff --git a/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h b/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h new file mode 100644 index 0000000000..18ca4030e0 --- /dev/null +++ b/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + + +//#endif diff --git a/config/rvv_sg2042/make_defs.mk b/config/rvv_sg2042/make_defs.mk new file mode 100644 index 0000000000..9dca0533f0 --- /dev/null +++ b/config/rvv_sg2042/make_defs.mk @@ -0,0 +1,103 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rvv_sg2042 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=64 + +RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 64,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 64,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -ftree-vectorize +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +# Lower compiler optimization. cinvscalv fails at -O1 +CRVECFLAGS := $(CKVECFLAGS) -O0 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/config_registry b/config_registry index 8154393487..f30c7d835d 100644 --- a/config_registry +++ b/config_registry @@ -55,12 +55,13 @@ power10: power10 power9: power9 bgq: bgq -# RISC-V architectures. +# RISC-V architectures. Added! rv32i: rv32i/rvi rv64i: rv64i/rvi rv32iv: rv32iv/rviv rv64iv: rv64iv/rviv - +rvv_sg2042: rvv_sg2042 + # SiFive architectures. sifive_rvv: sifive_rvv sifive_x280: sifive_x280/sifive_rvv diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index 53d9bdefdd..8f37b1d869 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -285,6 +285,9 @@ arch_t bli_arch_query_id_impl( void ) #ifdef BLIS_FAMILY_RV64IV id = BLIS_ARCH_RV64IV; #endif + #ifdef BLIS_FAMILY_RVV_SG2042 // added! + id = BLIS_ARCH_RVV_SG2042; + #endif // SiFive microarchitectures. #ifdef BLIS_FAMILY_SIFIVE_RVV @@ -358,6 +361,7 @@ static const char* config_name[ BLIS_NUM_ARCHS ] = "rv64i", "rv32iv", "rv64iv", + "rvv_sg2042", // added! "sifive_rvv", "sifive_x280", diff --git a/frame/include/bli_arch_config.h b/frame/include/bli_arch_config.h index 49a8943024..7216073972 100644 --- a/frame/include/bli_arch_config.h +++ b/frame/include/bli_arch_config.h @@ -277,7 +277,9 @@ INSERT_GENTCONF #ifdef BLIS_KERNELS_RVIV #include "bli_kernels_rviv.h" #endif - +#ifdef BLIS_KERNELS_RVV_SG2042 +#include "bli_kernels_rvv_sg2042.h" +#endif // -- SiFive RISC-V architectures -- #ifdef BLIS_KERNELS_SIFIVE_RVV diff --git a/frame/include/bli_gentconf_macro_defs.h b/frame/include/bli_gentconf_macro_defs.h index f6f3af20e8..98ad7e65c4 100644 --- a/frame/include/bli_gentconf_macro_defs.h +++ b/frame/include/bli_gentconf_macro_defs.h @@ -219,7 +219,11 @@ #else #define INSERT_GENTCONF_RV64IV #endif - +#ifdef BLIS_CONFIG_RVV_SG2042 // added! +#define INSERT_GENTCONF_RVV_SG2042 GENTCONF( RVV_SG2042, rvv_sg2042 ) +#else +#define INSERT_GENTCONF_RVV_SG2042 +#endif // -- SiFive architectures ---------------------------------------------------- #ifdef BLIS_CONFIG_SIFIVE_RVV @@ -284,6 +288,7 @@ INSERT_GENTCONF_RV32I \ INSERT_GENTCONF_RV64I \ INSERT_GENTCONF_RV32IV \ INSERT_GENTCONF_RV64IV \ +INSERT_GENTCONF_RVV_SG2042 \ \ INSERT_GENTCONF_SIFIVE_RVV \ INSERT_GENTCONF_SIFIVE_X280 \ diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 890d216ea2..809b44dd9d 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -1005,6 +1005,7 @@ typedef enum BLIS_ARCH_RV64I, BLIS_ARCH_RV32IV, BLIS_ARCH_RV64IV, + BLIS_ARCH_RVV_SG2042, // added! // SiFive BLIS_ARCH_SIFIVE_RVV, diff --git a/include/blis.h b/include/blis.h new file mode 100644 index 0000000000..999edb6a5e --- /dev/null +++ b/include/blis.h @@ -0,0 +1 @@ +#include diff --git a/include/cblas.h b/include/cblas.h new file mode 100644 index 0000000000..f9ab368727 --- /dev/null +++ b/include/cblas.h @@ -0,0 +1 @@ +#include diff --git a/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_4vx4.c b/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_4vx4.c new file mode 100644 index 0000000000..c0433474dc --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_4vx4.c @@ -0,0 +1,79 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_rvv_sg2042_utils.h" + +void bli_cgemm_rviv_asm_4vx4 + ( + intptr_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, intptr_t rs_c, intptr_t cs_c + ); + +void bli_cgemm_rviv_4vx4 + ( + dim_t m, + dim_t n, + dim_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c, + const auxinfo_t* data, + const cntx_t* cntx + ) +{ + // The assembly kernels always take native machine-sized integer arguments. + // dim_t and inc_t are normally defined as being machine-sized. If larger, assert. + bli_static_assert( sizeof(dim_t) <= sizeof(intptr_t) && + sizeof(inc_t) <= sizeof(intptr_t) ); + + // Extract vector-length dependent mr, nr that are fixed at configure time. + const inc_t mr = bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MR, cntx ); + const inc_t nr = 4; + + GEMM_UKR_SETUP_CT( c, mr, nr, false ); + + // The kernel assumes rs_c == 1, and the context should not deviate from it. + assert( rs_c == 1 ); + + bli_cgemm_rviv_asm_4vx4( k, alpha, a, b, beta, c, + get_vlenb() * 2, cs_c * sizeof(scomplex) ); + + GEMM_UKR_FLUSH_CT( c ); +} diff --git a/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_asm_4vx4.S b/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_asm_4vx4.S new file mode 100644 index 0000000000..dae8b4fe46 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_asm_4vx4.S @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#define REALNAME bli_cgemm_rvv_sg2042_asm_4vx4 +#define DATASIZE 8 +#define VTYPE e32 +#define FLOAD flw +#define FZERO(fr) fcvt.s.w fr, x0 +#define FEQ feq.s +#define VLE vlseg2e32.v +#define VSE vsseg2e32.v + +#include "bli_czgemm_rvv_sg2042_asm_4vx4.h" diff --git a/kernels/rvv_sg2042/3/bli_czgemm_rvv_sg2042_asm_4vx4.h b/kernels/rvv_sg2042/3/bli_czgemm_rvv_sg2042_asm_4vx4.h new file mode 100644 index 0000000000..192ee35657 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_czgemm_rvv_sg2042_asm_4vx4.h @@ -0,0 +1,801 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + + .text + .align 2 + .global REALNAME + +// void REALNAME(intptr_t k, void* alpha, void* a, void* b, +// void* beta, void* c, intptr_t rs_c, intptr_t cs_c) +// +// register arguments: +// a0 k +// a1 alpha +// a2 a +// a3 b +// a4 beta +// a5 c +// a6 rs_c +// a7 cs_c +// + +#define REALSIZE (DATASIZE/2) + +#define loop_counter a0 + +#define A00_ptr a2 +#define A10_ptr t0 +#define A01_ptr t1 +#define A11_ptr t2 + +#define B_row_ptr a3 + +#define C00_ptr a5 +#define C01_ptr t3 +#define C02_ptr t4 +#define C03_ptr t5 +#define C10_ptr s1 +#define C11_ptr s2 +#define C12_ptr s3 +#define C13_ptr s4 + +#define tmp t6 + +#define ALPHA_re fa0 +#define ALPHA_im fa1 +#define BETA_re fa2 +#define BETA_im fa3 + +#define B00_re fa4 +#define B00_im fa5 +#define B01_re fa6 +#define B01_im fa7 +#define B02_re fa0 +#define B02_im fa1 +#define B03_re fa2 +#define B03_im fa3 + +#define B10_re ft0 +#define B10_im ft1 +#define B11_re ft2 +#define B11_im ft3 +#define B12_re ft4 +#define B12_im ft5 +#define B13_re ft6 +#define B13_im ft7 + +#define fzero ft8 + +#define A00_re v24 +#define A00_im v25 +#define A10_re v26 +#define A10_im v27 +#define A01_re v28 +#define A01_im v29 +#define A11_re v30 +#define A11_im v31 + +#define C0_re v24 +#define C0_im v25 +#define C1_re v26 +#define C1_im v27 +#define C2_re v28 +#define C2_im v29 +#define C3_re v30 +#define C3_im v31 + +#define AB00_re v0 +#define AB00_im v1 +#define AB01_re v2 +#define AB01_im v3 +#define AB02_re v4 +#define AB02_im v5 +#define AB03_re v6 +#define AB03_im v7 +#define AB10_re v8 +#define AB10_im v9 +#define AB11_re v10 +#define AB11_im v11 +#define AB12_re v12 +#define AB12_im v13 +#define AB13_re v14 +#define AB13_im v15 + +#define tmp0_re v16 +#define tmp0_im v17 +#define tmp1_re v18 +#define tmp1_im v19 +#define tmp2_re v20 +#define tmp2_im v21 +#define tmp3_re v22 +#define tmp3_im v23 + +#define rs_c a6 +#define cs_c a7 + +REALNAME: + #include "rvv_sg2042_save_registers.h" + + vsetvli s0, zero, VTYPE, m1, ta, ma + csrr s0, vlenb + slli s0, s0, 1 + FZERO(fzero) + + // Set up pointers + add C01_ptr, C00_ptr, cs_c + add C02_ptr, C01_ptr, cs_c + add C03_ptr, C02_ptr, cs_c + add C10_ptr, C00_ptr, rs_c + add C11_ptr, C01_ptr, rs_c + add C12_ptr, C02_ptr, rs_c + add C13_ptr, C03_ptr, rs_c + + // Zero-initialize accumulators + vxor.vv AB00_re, AB00_re, AB00_re + vxor.vv AB00_im, AB00_im, AB00_im + vxor.vv AB01_re, AB01_re, AB01_re + vxor.vv AB01_im, AB01_im, AB01_im + vxor.vv AB02_re, AB02_re, AB02_re + vxor.vv AB02_im, AB02_im, AB02_im + vxor.vv AB03_re, AB03_re, AB03_re + vxor.vv AB03_im, AB03_im, AB03_im + vxor.vv AB10_re, AB10_re, AB10_re + vxor.vv AB10_im, AB10_im, AB10_im + vxor.vv AB11_re, AB11_re, AB11_re + vxor.vv AB11_im, AB11_im, AB11_im + vxor.vv AB12_re, AB12_re, AB12_re + vxor.vv AB12_im, AB12_im, AB12_im + vxor.vv AB13_re, AB13_re, AB13_re + vxor.vv AB13_im, AB13_im, AB13_im + + // Handle k == 0 + beqz loop_counter, MULTIPLYBETA + + add A10_ptr, A00_ptr, s0 + slli s0, s0, 1 // length of a column of A in bytes + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + + li tmp, 3 + ble loop_counter, tmp, TAIL_UNROLL_2 + + // Preload A and B + // Load and deinterleave A(:,l) + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + + // Load B(l,0:3) + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + // Load and deinterleave A(:,l+1) + VLE A01_re, (A01_ptr) + VLE A11_re, (A11_ptr) + +LOOP_UNROLL_4: // loop_counter >= 4 + addi loop_counter, loop_counter, -4 + + vfmacc.vf AB00_re, B00_re, A00_re // AB(:,0) += A(:,l) * B(l,0) + vfnmsac.vf AB00_re, B00_im, A00_im + vfmacc.vf AB00_im, B00_re, A00_im + vfmacc.vf AB00_im, B00_im, A00_re + vfmacc.vf AB10_re, B00_re, A10_re + vfnmsac.vf AB10_re, B00_im, A10_im + vfmacc.vf AB10_im, B00_re, A10_im + vfmacc.vf AB10_im, B00_im, A10_re + + vfmacc.vf AB01_re, B01_re, A00_re // AB(:,1) += A(:,l) * B(l,1) + vfnmsac.vf AB01_re, B01_im, A00_im + vfmacc.vf AB01_im, B01_re, A00_im + vfmacc.vf AB01_im, B01_im, A00_re + vfmacc.vf AB11_re, B01_re, A10_re + vfnmsac.vf AB11_re, B01_im, A10_im + vfmacc.vf AB11_im, B01_re, A10_im + vfmacc.vf AB11_im, B01_im, A10_re + + // Point to A(:,l+2), A(:,l+3) + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + + // Load B(l+1,0:3) + FLOAD B10_re, 8*REALSIZE(B_row_ptr) + FLOAD B10_im, 9*REALSIZE(B_row_ptr) + FLOAD B11_re, 10*REALSIZE(B_row_ptr) + FLOAD B11_im, 11*REALSIZE(B_row_ptr) + FLOAD B12_re, 12*REALSIZE(B_row_ptr) + FLOAD B12_im, 13*REALSIZE(B_row_ptr) + FLOAD B13_re, 14*REALSIZE(B_row_ptr) + FLOAD B13_im, 15*REALSIZE(B_row_ptr) + addi B_row_ptr, B_row_ptr, 16*REALSIZE + + vfmacc.vf AB00_re, B10_re, A01_re // AB(:,0) += A(:,l+1) * B(l+1,0) + vfnmsac.vf AB00_re, B10_im, A01_im + vfmacc.vf AB00_im, B10_re, A01_im + vfmacc.vf AB00_im, B10_im, A01_re + vfmacc.vf AB10_re, B10_re, A11_re + vfnmsac.vf AB10_re, B10_im, A11_im + vfmacc.vf AB10_im, B10_re, A11_im + vfmacc.vf AB10_im, B10_im, A11_re + + vfmacc.vf AB02_re, B02_re, A00_re // AB(:,2) += A(:,l) * B(l,2) + vfnmsac.vf AB02_re, B02_im, A00_im + vfmacc.vf AB02_im, B02_re, A00_im + vfmacc.vf AB02_im, B02_im, A00_re + vfmacc.vf AB12_re, B02_re, A10_re + vfnmsac.vf AB12_re, B02_im, A10_im + vfmacc.vf AB12_im, B02_re, A10_im + vfmacc.vf AB12_im, B02_im, A10_re + + vfmacc.vf AB03_re, B03_re, A00_re // AB(:,3) += A(:,l) * B(l,3) + vfnmsac.vf AB03_re, B03_im, A00_im + vfmacc.vf AB03_im, B03_re, A00_im + vfmacc.vf AB03_im, B03_im, A00_re + vfmacc.vf AB13_re, B03_re, A10_re + vfnmsac.vf AB13_re, B03_im, A10_im + vfmacc.vf AB13_im, B03_re, A10_im + vfmacc.vf AB13_im, B03_im, A10_re + + // Load and deinterleave A(:,l+2) + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + + // Load B(l+2, 0:3) + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + vfmacc.vf AB01_re, B11_re, A01_re // AB(:,1) += A(:,l+1) * B(l+1,1) + vfnmsac.vf AB01_re, B11_im, A01_im + vfmacc.vf AB01_im, B11_re, A01_im + vfmacc.vf AB01_im, B11_im, A01_re + vfmacc.vf AB11_re, B11_re, A11_re + vfnmsac.vf AB11_re, B11_im, A11_im + vfmacc.vf AB11_im, B11_re, A11_im + vfmacc.vf AB11_im, B11_im, A11_re + + vfmacc.vf AB02_re, B12_re, A01_re // AB(:,2) += A(:,l+1) * B(l+1,2) + vfnmsac.vf AB02_re, B12_im, A01_im + vfmacc.vf AB02_im, B12_re, A01_im + vfmacc.vf AB02_im, B12_im, A01_re + vfmacc.vf AB12_re, B12_re, A11_re + vfnmsac.vf AB12_re, B12_im, A11_im + vfmacc.vf AB12_im, B12_re, A11_im + vfmacc.vf AB12_im, B12_im, A11_re + + vfmacc.vf AB03_re, B13_re, A01_re // AB(:,3) += A(:,l+1) * B(l+1,3) + vfnmsac.vf AB03_re, B13_im, A01_im + vfmacc.vf AB03_im, B13_re, A01_im + vfmacc.vf AB03_im, B13_im, A01_re + vfmacc.vf AB13_re, B13_re, A11_re + vfnmsac.vf AB13_re, B13_im, A11_im + vfmacc.vf AB13_im, B13_re, A11_im + vfmacc.vf AB13_im, B13_im, A11_re + + // Load and deinterleave A(:,l+3) + VLE A01_re, (A01_ptr) + VLE A11_re, (A11_ptr) + + // Point to A(:,l+2), A(:,l+3) + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + + // Load B(l+3, 0:3) + FLOAD B10_re, 8*REALSIZE(B_row_ptr) + FLOAD B10_im, 9*REALSIZE(B_row_ptr) + FLOAD B11_re, 10*REALSIZE(B_row_ptr) + FLOAD B11_im, 11*REALSIZE(B_row_ptr) + FLOAD B12_re, 12*REALSIZE(B_row_ptr) + FLOAD B12_im, 13*REALSIZE(B_row_ptr) + FLOAD B13_re, 14*REALSIZE(B_row_ptr) + FLOAD B13_im, 15*REALSIZE(B_row_ptr) + addi B_row_ptr, B_row_ptr, 16*REALSIZE + + vfmacc.vf AB00_re, B00_re, A00_re // AB(:,0) += A(:,l+2) * B(l+2,0) + vfnmsac.vf AB00_re, B00_im, A00_im + vfmacc.vf AB00_im, B00_re, A00_im + vfmacc.vf AB00_im, B00_im, A00_re + vfmacc.vf AB10_re, B00_re, A10_re + vfnmsac.vf AB10_re, B00_im, A10_im + vfmacc.vf AB10_im, B00_re, A10_im + vfmacc.vf AB10_im, B00_im, A10_re + + vfmacc.vf AB00_re, B10_re, A01_re // AB(:,0) += A(:,l+3) * B(l+3,0) + vfnmsac.vf AB00_re, B10_im, A01_im + vfmacc.vf AB00_im, B10_re, A01_im + vfmacc.vf AB00_im, B10_im, A01_re + vfmacc.vf AB10_re, B10_re, A11_re + vfnmsac.vf AB10_re, B10_im, A11_im + vfmacc.vf AB10_im, B10_re, A11_im + vfmacc.vf AB10_im, B10_im, A11_re + + vfmacc.vf AB01_re, B01_re, A00_re // AB(:,1) += A(:,l+2) * B(l+2,1) + vfnmsac.vf AB01_re, B01_im, A00_im + vfmacc.vf AB01_im, B01_re, A00_im + vfmacc.vf AB01_im, B01_im, A00_re + vfmacc.vf AB11_re, B01_re, A10_re + vfnmsac.vf AB11_re, B01_im, A10_im + vfmacc.vf AB11_im, B01_re, A10_im + vfmacc.vf AB11_im, B01_im, A10_re + + vfmacc.vf AB01_re, B11_re, A01_re // AB(:,1) += A(:,l+3) * B(l+3,1) + vfnmsac.vf AB01_re, B11_im, A01_im + vfmacc.vf AB01_im, B11_re, A01_im + vfmacc.vf AB01_im, B11_im, A01_re + vfmacc.vf AB11_re, B11_re, A11_re + vfnmsac.vf AB11_re, B11_im, A11_im + vfmacc.vf AB11_im, B11_re, A11_im + vfmacc.vf AB11_im, B11_im, A11_re + + vfmacc.vf AB02_re, B02_re, A00_re // AB(:,2) += A(:,l+2) * B(l+2,2) + vfnmsac.vf AB02_re, B02_im, A00_im + vfmacc.vf AB02_im, B02_re, A00_im + vfmacc.vf AB02_im, B02_im, A00_re + vfmacc.vf AB12_re, B02_re, A10_re + vfnmsac.vf AB12_re, B02_im, A10_im + vfmacc.vf AB12_im, B02_re, A10_im + vfmacc.vf AB12_im, B02_im, A10_re + + vfmacc.vf AB02_re, B12_re, A01_re // AB(:,2) += A(:,l+3) * B(l+3,2) + vfnmsac.vf AB02_re, B12_im, A01_im + vfmacc.vf AB02_im, B12_re, A01_im + vfmacc.vf AB02_im, B12_im, A01_re + vfmacc.vf AB12_re, B12_re, A11_re + vfnmsac.vf AB12_re, B12_im, A11_im + vfmacc.vf AB12_im, B12_re, A11_im + vfmacc.vf AB12_im, B12_im, A11_re + + vfmacc.vf AB03_re, B03_re, A00_re // AB(:,3) += A(:,l+2) * B(l+2,3) + vfnmsac.vf AB03_re, B03_im, A00_im + vfmacc.vf AB03_im, B03_re, A00_im + vfmacc.vf AB03_im, B03_im, A00_re + vfmacc.vf AB13_re, B03_re, A10_re + vfnmsac.vf AB13_re, B03_im, A10_im + vfmacc.vf AB13_im, B03_re, A10_im + vfmacc.vf AB13_im, B03_im, A10_re + + vfmacc.vf AB03_re, B13_re, A01_re // AB(:,3) += A(:,l+3) * B(l+3,3) + vfnmsac.vf AB03_re, B13_im, A01_im + vfmacc.vf AB03_im, B13_re, A01_im + vfmacc.vf AB03_im, B13_im, A01_re + vfmacc.vf AB13_re, B13_re, A11_re + vfnmsac.vf AB13_re, B13_im, A11_im + vfmacc.vf AB13_im, B13_re, A11_im + vfmacc.vf AB13_im, B13_im, A11_re + + li tmp, 3 + ble loop_counter, tmp, TAIL_UNROLL_2 + + // Load A and B for the next iteration + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + VLE A01_re, (A01_ptr) + VLE A11_re, (A11_ptr) + + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + j LOOP_UNROLL_4 + +TAIL_UNROLL_2: // loop_counter <= 3 + li tmp, 1 + ble loop_counter, tmp, TAIL_UNROLL_1 + + addi loop_counter, loop_counter, -2 + + // Load and deinterleave A(:,l) + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + + // Load B(l, 0:3) + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + vfmacc.vf AB00_re, B00_re, A00_re // AB(:,0) += A(:,l) * B(l,0) + vfnmsac.vf AB00_re, B00_im, A00_im + vfmacc.vf AB00_im, B00_re, A00_im + vfmacc.vf AB00_im, B00_im, A00_re + vfmacc.vf AB10_re, B00_re, A10_re + vfnmsac.vf AB10_re, B00_im, A10_im + vfmacc.vf AB10_im, B00_re, A10_im + vfmacc.vf AB10_im, B00_im, A10_re + + vfmacc.vf AB01_re, B01_re, A00_re // AB(:,1) += A(:,l) * B(l,1) + vfnmsac.vf AB01_re, B01_im, A00_im + vfmacc.vf AB01_im, B01_re, A00_im + vfmacc.vf AB01_im, B01_im, A00_re + vfmacc.vf AB11_re, B01_re, A10_re + vfnmsac.vf AB11_re, B01_im, A10_im + vfmacc.vf AB11_im, B01_re, A10_im + vfmacc.vf AB11_im, B01_im, A10_re + + // Load and deinterleave A(:,l+1) + VLE A01_re, (A01_ptr) + VLE A11_re, (A11_ptr) + + // Load B(l+1, 0:3) + FLOAD B10_re, 8*REALSIZE(B_row_ptr) + FLOAD B10_im, 9*REALSIZE(B_row_ptr) + FLOAD B11_re, 10*REALSIZE(B_row_ptr) + FLOAD B11_im, 11*REALSIZE(B_row_ptr) + FLOAD B12_re, 12*REALSIZE(B_row_ptr) + FLOAD B12_im, 13*REALSIZE(B_row_ptr) + FLOAD B13_re, 14*REALSIZE(B_row_ptr) + FLOAD B13_im, 15*REALSIZE(B_row_ptr) + + vfmacc.vf AB00_re, B10_re, A01_re // AB(:,0) += A(:,l+1) * B(l+1,0) + vfnmsac.vf AB00_re, B10_im, A01_im + vfmacc.vf AB00_im, B10_re, A01_im + vfmacc.vf AB00_im, B10_im, A01_re + vfmacc.vf AB10_re, B10_re, A11_re + vfnmsac.vf AB10_re, B10_im, A11_im + vfmacc.vf AB10_im, B10_re, A11_im + vfmacc.vf AB10_im, B10_im, A11_re + + vfmacc.vf AB01_re, B11_re, A01_re // AB(:,1) += A(:,l+1) * B(l+1,1) + vfnmsac.vf AB01_re, B11_im, A01_im + vfmacc.vf AB01_im, B11_re, A01_im + vfmacc.vf AB01_im, B11_im, A01_re + vfmacc.vf AB11_re, B11_re, A11_re + vfnmsac.vf AB11_re, B11_im, A11_im + vfmacc.vf AB11_im, B11_re, A11_im + vfmacc.vf AB11_im, B11_im, A11_re + + vfmacc.vf AB02_re, B02_re, A00_re // AB(:,2) += A(:,l) * B(l,2) + vfnmsac.vf AB02_re, B02_im, A00_im + vfmacc.vf AB02_im, B02_re, A00_im + vfmacc.vf AB02_im, B02_im, A00_re + vfmacc.vf AB12_re, B02_re, A10_re + vfnmsac.vf AB12_re, B02_im, A10_im + vfmacc.vf AB12_im, B02_re, A10_im + vfmacc.vf AB12_im, B02_im, A10_re + + vfmacc.vf AB03_re, B03_re, A00_re // AB(:,3) += A(:,l) * B(l,3) + vfnmsac.vf AB03_re, B03_im, A00_im + vfmacc.vf AB03_im, B03_re, A00_im + vfmacc.vf AB03_im, B03_im, A00_re + vfmacc.vf AB13_re, B03_re, A10_re + vfnmsac.vf AB13_re, B03_im, A10_im + vfmacc.vf AB13_im, B03_re, A10_im + vfmacc.vf AB13_im, B03_im, A10_re + + vfmacc.vf AB02_re, B12_re, A01_re // AB(:,2) += A(:,l+1) * B(l+1,2) + vfnmsac.vf AB02_re, B12_im, A01_im + vfmacc.vf AB02_im, B12_re, A01_im + vfmacc.vf AB02_im, B12_im, A01_re + vfmacc.vf AB12_re, B12_re, A11_re + vfnmsac.vf AB12_re, B12_im, A11_im + vfmacc.vf AB12_im, B12_re, A11_im + vfmacc.vf AB12_im, B12_im, A11_re + + vfmacc.vf AB03_re, B13_re, A01_re // AB(:,3) += A(:,l+1) * B(l+1,3) + vfnmsac.vf AB03_re, B13_im, A01_im + vfmacc.vf AB03_im, B13_re, A01_im + vfmacc.vf AB03_im, B13_im, A01_re + vfmacc.vf AB13_re, B13_re, A11_re + vfnmsac.vf AB13_re, B13_im, A11_im + vfmacc.vf AB13_im, B13_re, A11_im + vfmacc.vf AB13_im, B13_im, A11_re + + beqz loop_counter, MULTIPLYALPHA + + // Advance pointers + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + addi B_row_ptr, B_row_ptr, 16*REALSIZE + +TAIL_UNROLL_1: // loop_counter <= 1 + beqz loop_counter, MULTIPLYALPHA + + // Load and deinterleave A(:,l) + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + + // Load B(l,0:3) + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + vfmacc.vf AB00_re, B00_re, A00_re // AB(:,0) += A(:,l) * B(l,0) + vfnmsac.vf AB00_re, B00_im, A00_im + vfmacc.vf AB00_im, B00_re, A00_im + vfmacc.vf AB00_im, B00_im, A00_re + vfmacc.vf AB10_re, B00_re, A10_re + vfnmsac.vf AB10_re, B00_im, A10_im + vfmacc.vf AB10_im, B00_re, A10_im + vfmacc.vf AB10_im, B00_im, A10_re + + vfmacc.vf AB01_re, B01_re, A00_re // AB(:,1) += A(:,l) * B(l,1) + vfnmsac.vf AB01_re, B01_im, A00_im + vfmacc.vf AB01_im, B01_re, A00_im + vfmacc.vf AB01_im, B01_im, A00_re + vfmacc.vf AB11_re, B01_re, A10_re + vfnmsac.vf AB11_re, B01_im, A10_im + vfmacc.vf AB11_im, B01_re, A10_im + vfmacc.vf AB11_im, B01_im, A10_re + + vfmacc.vf AB02_re, B02_re, A00_re // AB(:,2) += A(:,l) * B(l,2) + vfnmsac.vf AB02_re, B02_im, A00_im + vfmacc.vf AB02_im, B02_re, A00_im + vfmacc.vf AB02_im, B02_im, A00_re + vfmacc.vf AB12_re, B02_re, A10_re + vfnmsac.vf AB12_re, B02_im, A10_im + vfmacc.vf AB12_im, B02_re, A10_im + vfmacc.vf AB12_im, B02_im, A10_re + + vfmacc.vf AB03_re, B03_re, A00_re // AB(:,3) += A(:,l) * B(l,3) + vfnmsac.vf AB03_re, B03_im, A00_im + vfmacc.vf AB03_im, B03_re, A00_im + vfmacc.vf AB03_im, B03_im, A00_re + vfmacc.vf AB13_re, B03_re, A10_re + vfnmsac.vf AB13_re, B03_im, A10_im + vfmacc.vf AB13_im, B03_re, A10_im + vfmacc.vf AB13_im, B03_im, A10_re + +MULTIPLYALPHA: + FLOAD ALPHA_re, 0*REALSIZE(a1) + FLOAD ALPHA_im, 1*REALSIZE(a1) + + FEQ tmp, ALPHA_im, fzero + bne tmp, zero, ALPHAREAL + + // [AB00, ..., AB03] * alpha + vfmul.vf tmp0_re, AB00_im, ALPHA_im + vfmul.vf tmp0_im, AB00_re, ALPHA_im + vfmul.vf tmp1_re, AB01_im, ALPHA_im + vfmul.vf tmp1_im, AB01_re, ALPHA_im + vfmul.vf tmp2_re, AB02_im, ALPHA_im + vfmul.vf tmp2_im, AB02_re, ALPHA_im + vfmul.vf tmp3_re, AB03_im, ALPHA_im + vfmul.vf tmp3_im, AB03_re, ALPHA_im + vfmsub.vf AB00_re, ALPHA_re, tmp0_re + vfmsub.vf AB01_re, ALPHA_re, tmp1_re + vfmsub.vf AB02_re, ALPHA_re, tmp2_re + vfmsub.vf AB03_re, ALPHA_re, tmp3_re + vfmadd.vf AB00_im, ALPHA_re, tmp0_im + vfmadd.vf AB01_im, ALPHA_re, tmp1_im + vfmadd.vf AB02_im, ALPHA_re, tmp2_im + vfmadd.vf AB03_im, ALPHA_re, tmp3_im + + // [AB10, ..., AB13] * alpha + vfmul.vf tmp0_re, AB10_im, ALPHA_im + vfmul.vf tmp0_im, AB10_re, ALPHA_im + vfmul.vf tmp1_re, AB11_im, ALPHA_im + vfmul.vf tmp1_im, AB11_re, ALPHA_im + vfmul.vf tmp2_re, AB12_im, ALPHA_im + vfmul.vf tmp2_im, AB12_re, ALPHA_im + vfmul.vf tmp3_re, AB13_im, ALPHA_im + vfmul.vf tmp3_im, AB13_re, ALPHA_im + vfmsub.vf AB10_re, ALPHA_re, tmp0_re + vfmsub.vf AB11_re, ALPHA_re, tmp1_re + vfmsub.vf AB12_re, ALPHA_re, tmp2_re + vfmsub.vf AB13_re, ALPHA_re, tmp3_re + vfmadd.vf AB10_im, ALPHA_re, tmp0_im + vfmadd.vf AB11_im, ALPHA_re, tmp1_im + vfmadd.vf AB12_im, ALPHA_re, tmp2_im + vfmadd.vf AB13_im, ALPHA_re, tmp3_im + + j MULTIPLYBETA + +ALPHAREAL: + vfmul.vf AB00_re, AB00_re, ALPHA_re + vfmul.vf AB00_im, AB00_im, ALPHA_re + vfmul.vf AB01_re, AB01_re, ALPHA_re + vfmul.vf AB01_im, AB01_im, ALPHA_re + vfmul.vf AB02_re, AB02_re, ALPHA_re + vfmul.vf AB02_im, AB02_im, ALPHA_re + vfmul.vf AB03_re, AB03_re, ALPHA_re + vfmul.vf AB03_im, AB03_im, ALPHA_re + + vfmul.vf AB10_re, AB10_re, ALPHA_re + vfmul.vf AB10_im, AB10_im, ALPHA_re + vfmul.vf AB11_re, AB11_re, ALPHA_re + vfmul.vf AB11_im, AB11_im, ALPHA_re + vfmul.vf AB12_re, AB12_re, ALPHA_re + vfmul.vf AB12_im, AB12_im, ALPHA_re + vfmul.vf AB13_re, AB13_re, ALPHA_re + vfmul.vf AB13_im, AB13_im, ALPHA_re + +MULTIPLYBETA: + FLOAD BETA_re, 0*REALSIZE(a4) + FLOAD BETA_im, 1*REALSIZE(a4) + FEQ tmp, BETA_im, fzero + bne tmp, zero, BETAREAL + + // Load and deinterleave C(0:VLEN-1, 0:1) + VLE C0_re, (C00_ptr) + VLE C1_re, (C01_ptr) + + // Load and deinterleave C(0:VLEN-1, 2:3) + VLE C2_re, (C02_ptr) + VLE C3_re, (C03_ptr) + + // C(0:VLEN-1,0:1) * beta + AB(0:VLEN-1,0:1) + vfmacc.vf AB00_re, BETA_re, C0_re + vfnmsac.vf AB00_re, BETA_im, C0_im + vfmacc.vf AB00_im, BETA_re, C0_im + vfmacc.vf AB00_im, BETA_im, C0_re + VSE AB00_re, (C00_ptr) + + vfmacc.vf AB01_re, BETA_re, C1_re + vfnmsac.vf AB01_re, BETA_im, C1_im + vfmacc.vf AB01_im, BETA_re, C1_im + vfmacc.vf AB01_im, BETA_im, C1_re + VSE AB01_re, (C01_ptr) + + // C(0:VLEN-1,2:3) * beta + AB(0:VLEN-1,2:3) + vfmacc.vf AB02_re, BETA_re, C2_re + vfnmsac.vf AB02_re, BETA_im, C2_im + vfmacc.vf AB02_im, BETA_re, C2_im + vfmacc.vf AB02_im, BETA_im, C2_re + VSE AB02_re, (C02_ptr) + + vfmacc.vf AB03_re, BETA_re, C3_re + vfnmsac.vf AB03_re, BETA_im, C3_im + vfmacc.vf AB03_im, BETA_re, C3_im + vfmacc.vf AB03_im, BETA_im, C3_re + VSE AB03_re, (C03_ptr) + + // Load and deinterleave C(VLEN:2*VLEN-1, 0:1) + VLE C0_re, (C10_ptr) + VLE C1_re, (C11_ptr) + + // Load and deinterleave C(VLEN:2*VLEN-1, 2:3) + VLE C2_re, (C12_ptr) + VLE C3_re, (C13_ptr) + + // C(VLEN:2*VLEN-1,0:1) * beta + AB(VLEN:2*VLEN-1,0:1) + vfmacc.vf AB10_re, BETA_re, C0_re + vfnmsac.vf AB10_re, BETA_im, C0_im + vfmacc.vf AB10_im, BETA_re, C0_im + vfmacc.vf AB10_im, BETA_im, C0_re + VSE AB10_re, (C10_ptr) + + vfmacc.vf AB11_re, BETA_re, C1_re + vfnmsac.vf AB11_re, BETA_im, C1_im + vfmacc.vf AB11_im, BETA_re, C1_im + vfmacc.vf AB11_im, BETA_im, C1_re + VSE AB11_re, (C11_ptr) + + // C(VLEN:2*VLEN-1,2:3) * beta + AB(VLEN:2*VLEN-1,2:3) + vfmacc.vf AB12_re, BETA_re, C2_re + vfnmsac.vf AB12_re, BETA_im, C2_im + vfmacc.vf AB12_im, BETA_re, C2_im + vfmacc.vf AB12_im, BETA_im, C2_re + VSE AB12_re, (C12_ptr) + + vfmacc.vf AB13_re, BETA_re, C3_re + vfnmsac.vf AB13_re, BETA_im, C3_im + vfmacc.vf AB13_im, BETA_re, C3_im + vfmacc.vf AB13_im, BETA_im, C3_re + VSE AB13_re, (C13_ptr) + + j END + +BETAREAL: + FEQ tmp, BETA_re, fzero + bne tmp, zero, BETAZERO + + // Load and deinterleave C(0:VLEN-1, 0:3) + VLE C0_re, (C00_ptr) + VLE C1_re, (C01_ptr) + VLE C2_re, (C02_ptr) + VLE C3_re, (C03_ptr) + + // C(0:VLEN-1,0:3) * beta + AB(0:VLEN-1,0:3) + vfmacc.vf AB00_re, BETA_re, C0_re + vfmacc.vf AB00_im, BETA_re, C0_im + vfmacc.vf AB01_re, BETA_re, C1_re + vfmacc.vf AB01_im, BETA_re, C1_im + + vfmacc.vf AB02_re, BETA_re, C2_re + vfmacc.vf AB02_im, BETA_re, C2_im + vfmacc.vf AB03_re, BETA_re, C3_re + vfmacc.vf AB03_im, BETA_re, C3_im + + VSE AB00_re, (C00_ptr) + VSE AB01_re, (C01_ptr) + VSE AB02_re, (C02_ptr) + VSE AB03_re, (C03_ptr) + + // Load and deinterleave C(VLEN:2*VLEN-1, 0:3) + VLE C0_re, (C10_ptr) + VLE C1_re, (C11_ptr) + VLE C2_re, (C12_ptr) + VLE C3_re, (C13_ptr) + + // C(VLEN:2*VLEN-1,0:3) * beta + AB(VLEN:2*VLEN-1,0:3) + vfmacc.vf AB10_re, BETA_re, C0_re + vfmacc.vf AB10_im, BETA_re, C0_im + vfmacc.vf AB11_re, BETA_re, C1_re + vfmacc.vf AB11_im, BETA_re, C1_im + + vfmacc.vf AB12_re, BETA_re, C2_re + vfmacc.vf AB12_im, BETA_re, C2_im + vfmacc.vf AB13_re, BETA_re, C3_re + vfmacc.vf AB13_im, BETA_re, C3_im + + VSE AB10_re, (C10_ptr) + VSE AB11_re, (C11_ptr) + VSE AB12_re, (C12_ptr) + VSE AB13_re, (C13_ptr) + + j END + +BETAZERO: + VSE AB00_re, (C00_ptr) + VSE AB01_re, (C01_ptr) + VSE AB02_re, (C02_ptr) + VSE AB03_re, (C03_ptr) + + VSE AB10_re, (C10_ptr) + VSE AB11_re, (C11_ptr) + VSE AB12_re, (C12_ptr) + VSE AB13_re, (C13_ptr) + +END: + #include "rvv_sg2042_restore_registers.h" + ret diff --git a/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_4vx4.c b/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_4vx4.c new file mode 100644 index 0000000000..04b241377e --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_4vx4.c @@ -0,0 +1,79 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ +#include "bli_rvv_sg2042_utils.h" + +void bli_dgemm_rvv_sg2042_asm_4vx4 + ( + intptr_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, intptr_t rs_c, intptr_t cs_c + ); + +void bli_dgemm_rvv_sg2042_4vx4 + ( + dim_t m, + dim_t n, + dim_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c, + const auxinfo_t* data, + const cntx_t* cntx + ) +{ + // The assembly kernels always take native machine-sized integer arguments. + // dim_t and inc_t are normally defined as being machine-sized. If larger, assert. + bli_static_assert( sizeof(dim_t) <= sizeof(intptr_t) && + sizeof(inc_t) <= sizeof(intptr_t) ); + + // Extract vector-length dependent mr, nr that are fixed at configure time. + const inc_t mr = bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_MR, cntx ); + const inc_t nr = 4; + + GEMM_UKR_SETUP_CT( d, mr, nr, false ); + + // The kernel assumes rs_c == 1, and the context should not deviate from it. + assert( rs_c == 1 ); + + bli_dgemm_rvv_sg2042_asm_4vx4( k, alpha, a, b, beta, c, + get_vlenb(), cs_c * sizeof(double) ); + + GEMM_UKR_FLUSH_CT( d ); +} diff --git a/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_asm_4vx4.S b/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_asm_4vx4.S new file mode 100644 index 0000000000..9919a5dae7 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_asm_4vx4.S @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#define REALNAME bli_dgemm_rvv_sg2042_asm_4vx4 +#define DATASIZE 8 +#define VTYPE e64 +#define FLOAD fld +#define FZERO(fr) fcvt.d.w fr, x0 +#define FEQ feq.d +#define VLE vle64.v +#define VSE vse64.v + +#include "bli_sdgemm_rvv_sg2042_asm_4vx4.h" diff --git a/kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h b/kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h new file mode 100644 index 0000000000..e4570321d3 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h @@ -0,0 +1,46 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include + +static inline uintptr_t get_vlenb(void) +{ + uintptr_t vlenb = 0; + __asm__ volatile ( + " csrr %0, vlenb" // vector length in bytes + : "=r" (vlenb) + ); + return vlenb; +} diff --git a/kernels/rvv_sg2042/3/bli_sdgemm_rvv_sg2042_asm_4vx4.h b/kernels/rvv_sg2042/3/bli_sdgemm_rvv_sg2042_asm_4vx4.h new file mode 100644 index 0000000000..f947e6a403 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_sdgemm_rvv_sg2042_asm_4vx4.h @@ -0,0 +1,627 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + + .text + .align 2 + .global REALNAME + +// void REALNAME(intptr_t k, void* alpha, void* a, void* b, +// void* beta, void* c, intptr_t rs_c, intptr_t cs_c) +// +// register arguments: +// a0 k +// a1 alpha +// a2 a +// a3 b +// a4 beta +// a5 c +// a6 rs_c +// a7 cs_c +// + +#define loop_counter a0 + +#define A00_ptr a2 +#define A10_ptr t0 +#define A20_ptr t1 +#define A30_ptr t2 +#define A01_ptr s5 +#define A11_ptr s6 +#define A21_ptr s7 +#define A31_ptr t6 + +#define B_row_ptr a3 + +#define C00_ptr a5 +#define C01_ptr t3 +#define C02_ptr t4 +#define C03_ptr t5 +#define C10_ptr s1 +#define C11_ptr s2 +#define C12_ptr s3 +#define C13_ptr s4 + +#define tmp t6 + +#define ALPHA fa1 +#define BETA fa2 + +#define B00 fa4 +#define B01 fa5 +#define B02 fa6 +#define B03 fa7 + +#define B10 fa0 +#define B11 fa1 +#define B12 fa2 +#define B13 fa3 + +#define fzero ft8 + +#define A00 v24 +#define A10 v25 +#define A20 v26 +#define A30 v27 + +#define A01 v28 +#define A11 v29 +#define A21 v30 +#define A31 v31 + +#define C00 v16 +#define C01 v17 +#define C02 v18 +#define C03 v19 +#define C10 v20 +#define C11 v21 +#define C12 v22 +#define C13 v23 +#define C20 v0 +#define C21 v1 +#define C22 v2 +#define C23 v3 +#define C30 v4 +#define C31 v5 +#define C32 v6 +#define C33 v7 + +#define AB00 v0 +#define AB01 v1 +#define AB02 v2 +#define AB03 v3 +#define AB10 v4 +#define AB11 v5 +#define AB12 v6 +#define AB13 v7 +#define AB20 v8 +#define AB21 v9 +#define AB22 v10 +#define AB23 v11 +#define AB30 v12 +#define AB31 v13 +#define AB32 v14 +#define AB33 v15 + +#define rs_c a6 +#define cs_c a7 + +REALNAME: + #include "rvv_sg2042_save_registers.h" + + vsetvli s0, zero, VTYPE, m1, ta, ma + csrr s0, vlenb + FZERO(fzero) + + // Set up pointers + add C01_ptr, C00_ptr, cs_c + add C02_ptr, C01_ptr, cs_c + add C03_ptr, C02_ptr, cs_c + add C10_ptr, C00_ptr, rs_c + add C11_ptr, C01_ptr, rs_c + add C12_ptr, C02_ptr, rs_c + add C13_ptr, C03_ptr, rs_c + + // Zero-initialize accumulators + vxor.vv AB00, AB00, AB00 + vxor.vv AB01, AB01, AB01 + vxor.vv AB02, AB02, AB02 + vxor.vv AB03, AB03, AB03 + vxor.vv AB10, AB10, AB10 + vxor.vv AB11, AB11, AB11 + vxor.vv AB12, AB12, AB12 + vxor.vv AB13, AB13, AB13 + vxor.vv AB20, AB20, AB20 + vxor.vv AB21, AB21, AB21 + vxor.vv AB22, AB22, AB22 + vxor.vv AB23, AB23, AB23 + vxor.vv AB30, AB30, AB30 + vxor.vv AB31, AB31, AB31 + vxor.vv AB32, AB32, AB32 + vxor.vv AB33, AB33, AB33 + + // Handle k == 0 + beqz loop_counter, MULTIPLYBETA + + // Set up pointers to rows of A + add A10_ptr, A00_ptr, s0 + add A20_ptr, A10_ptr, s0 + add A30_ptr, A20_ptr, s0 + + slli s0, s0, 2 // length of a column of A in bytes + + li tmp, 3 + ble loop_counter, tmp, TAIL_UNROLL_2 + + // Preload A and B + // Load A(:,l) + VLE A00, (A00_ptr) + VLE A10, (A10_ptr) + VLE A20, (A20_ptr) + VLE A30, (A30_ptr) + + // Load B(l,0:3) + FLOAD B00, 0*DATASIZE(B_row_ptr) + FLOAD B01, 1*DATASIZE(B_row_ptr) + FLOAD B02, 2*DATASIZE(B_row_ptr) + FLOAD B03, 3*DATASIZE(B_row_ptr) + + // Set up pointers to A(:,l+1) + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + add A21_ptr, A20_ptr, s0 + add A31_ptr, A30_ptr, s0 + +LOOP_UNROLL_4: + addi loop_counter, loop_counter, -4 + + vfmacc.vf AB00, B00, A00 // AB(0,:) += A(0,0) * B(0,:) + vfmacc.vf AB01, B01, A00 + vfmacc.vf AB02, B02, A00 + vfmacc.vf AB03, B03, A00 + + vfmacc.vf AB10, B00, A10 // AB(1,:) += A(1,0) * B(0,:) + vfmacc.vf AB11, B01, A10 + vfmacc.vf AB12, B02, A10 + vfmacc.vf AB13, B03, A10 + + // Load B(l+1,0:3) + FLOAD B10, 4*DATASIZE(B_row_ptr) + FLOAD B11, 5*DATASIZE(B_row_ptr) + FLOAD B12, 6*DATASIZE(B_row_ptr) + FLOAD B13, 7*DATASIZE(B_row_ptr) + addi B_row_ptr, B_row_ptr, 8*DATASIZE + + vfmacc.vf AB20, B00, A20 // AB(2,:) += A(2,0) * B(0,:) + vfmacc.vf AB21, B01, A20 + vfmacc.vf AB22, B02, A20 + vfmacc.vf AB23, B03, A20 + + // Load A(:,l+1) + VLE A01, (A01_ptr) + VLE A11, (A11_ptr) + VLE A21, (A21_ptr) + VLE A31, (A31_ptr) + + // Point to A(:,l+2) + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + add A20_ptr, A21_ptr, s0 + add A30_ptr, A31_ptr, s0 + + vfmacc.vf AB30, B00, A30 // AB(3,:) += A(3,0) * B(0,:) + vfmacc.vf AB31, B01, A30 + vfmacc.vf AB32, B02, A30 + vfmacc.vf AB33, B03, A30 + + vfmacc.vf AB00, B10, A01 // AB(0,:) += A(0,1) * B(1,:) + vfmacc.vf AB01, B11, A01 + vfmacc.vf AB02, B12, A01 + vfmacc.vf AB03, B13, A01 + + // Load B(l+2,0:3) + FLOAD B00, 0*DATASIZE(B_row_ptr) + FLOAD B01, 1*DATASIZE(B_row_ptr) + FLOAD B02, 2*DATASIZE(B_row_ptr) + FLOAD B03, 3*DATASIZE(B_row_ptr) + + vfmacc.vf AB10, B10, A11 // AB(1,:) += A(1,1) * B(1,:) + vfmacc.vf AB11, B11, A11 + vfmacc.vf AB12, B12, A11 + vfmacc.vf AB13, B13, A11 + + // Load A(:,l+2) + VLE A00, (A00_ptr) + VLE A10, (A10_ptr) + VLE A20, (A20_ptr) + VLE A30, (A30_ptr) + + // Point to A(:,l+3) + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + add A21_ptr, A20_ptr, s0 + add A31_ptr, A30_ptr, s0 + + vfmacc.vf AB20, B10, A21 // AB(2,:) += A(2,1) * B(1,:) + vfmacc.vf AB21, B11, A21 + vfmacc.vf AB22, B12, A21 + vfmacc.vf AB23, B13, A21 + + vfmacc.vf AB30, B10, A31 // AB(3,:) += A(3,1) * B(1,:) + vfmacc.vf AB31, B11, A31 + vfmacc.vf AB32, B12, A31 + vfmacc.vf AB33, B13, A31 + + // Load A(:,l+3) + VLE A01, (A01_ptr) + VLE A11, (A11_ptr) + VLE A21, (A21_ptr) + VLE A31, (A31_ptr) + + // Point to A(:,l+4) + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + add A20_ptr, A21_ptr, s0 + add A30_ptr, A31_ptr, s0 + + vfmacc.vf AB00, B00, A00 // AB(0,:) += A(0,2) * B(2,:) + vfmacc.vf AB01, B01, A00 + vfmacc.vf AB02, B02, A00 + vfmacc.vf AB03, B03, A00 + + // Load B(l+3,0:3) + FLOAD B10, 4*DATASIZE(B_row_ptr) + FLOAD B11, 5*DATASIZE(B_row_ptr) + FLOAD B12, 6*DATASIZE(B_row_ptr) + FLOAD B13, 7*DATASIZE(B_row_ptr) + addi B_row_ptr, B_row_ptr, 8*DATASIZE + + vfmacc.vf AB10, B00, A10 // AB(1,:) += A(1,2) * B(2,:) + vfmacc.vf AB11, B01, A10 + vfmacc.vf AB12, B02, A10 + vfmacc.vf AB13, B03, A10 + + vfmacc.vf AB20, B00, A20 // AB(2,:) += A(2,2) * B(2,:) + vfmacc.vf AB21, B01, A20 + vfmacc.vf AB22, B02, A20 + vfmacc.vf AB23, B03, A20 + + vfmacc.vf AB30, B00, A30 // AB(3,:) += A(3,2) * B(3,:) + vfmacc.vf AB31, B01, A30 + vfmacc.vf AB32, B02, A30 + vfmacc.vf AB33, B03, A30 + + vfmacc.vf AB00, B10, A01 // AB(0,:) += A(0,3) * B(3,:) + vfmacc.vf AB01, B11, A01 + vfmacc.vf AB02, B12, A01 + vfmacc.vf AB03, B13, A01 + + vfmacc.vf AB10, B10, A11 // AB(1,:) += A(1,3) * B(3,:) + vfmacc.vf AB11, B11, A11 + vfmacc.vf AB12, B12, A11 + vfmacc.vf AB13, B13, A11 + + vfmacc.vf AB20, B10, A21 // AB(2,:) += A(2,3) * B(3,:) + vfmacc.vf AB21, B11, A21 + vfmacc.vf AB22, B12, A21 + vfmacc.vf AB23, B13, A21 + + vfmacc.vf AB30, B10, A31 // AB(3,:) += A(3,3) * B(3,:) + vfmacc.vf AB31, B11, A31 + vfmacc.vf AB32, B12, A31 + vfmacc.vf AB33, B13, A31 + + li tmp, 3 + ble loop_counter, tmp, TAIL_UNROLL_2 + + // Load A and B for the next iteration + // Load B(l,0:3) + FLOAD B00, 0*DATASIZE(B_row_ptr) + FLOAD B01, 1*DATASIZE(B_row_ptr) + FLOAD B02, 2*DATASIZE(B_row_ptr) + FLOAD B03, 3*DATASIZE(B_row_ptr) + + // Load A(:,l) + VLE A00, (A00_ptr) + VLE A10, (A10_ptr) + VLE A20, (A20_ptr) + VLE A30, (A30_ptr) + + // Set up pointers to A(:,l+1) + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + add A21_ptr, A20_ptr, s0 + add A31_ptr, A30_ptr, s0 + + j LOOP_UNROLL_4 + +TAIL_UNROLL_2: // loop_counter <= 3 + li tmp, 1 + ble loop_counter, tmp, TAIL_UNROLL_1 + + addi loop_counter, loop_counter, -2 + + // Load B(l,0:3) + FLOAD B00, 0*DATASIZE(B_row_ptr) + FLOAD B01, 1*DATASIZE(B_row_ptr) + FLOAD B02, 2*DATASIZE(B_row_ptr) + FLOAD B03, 3*DATASIZE(B_row_ptr) + + // Load A(0:1,l) + VLE A00, (A00_ptr) + VLE A10, (A10_ptr) + + // Point to A(:,l+1) + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + add A21_ptr, A20_ptr, s0 + add A31_ptr, A30_ptr, s0 + + vfmacc.vf AB00, B00, A00 // AB(0,:) += A(0,0) * B(0,:) + vfmacc.vf AB01, B01, A00 + vfmacc.vf AB02, B02, A00 + vfmacc.vf AB03, B03, A00 + + // Load A(2:3,l) + VLE A20, (A20_ptr) + VLE A30, (A30_ptr) + + vfmacc.vf AB10, B00, A10 // AB(1,:) += A(1,0) * B(0,:) + vfmacc.vf AB11, B01, A10 + vfmacc.vf AB12, B02, A10 + vfmacc.vf AB13, B03, A10 + + // Load B(l+1,0:3) + FLOAD B10, 4*DATASIZE(B_row_ptr) + FLOAD B11, 5*DATASIZE(B_row_ptr) + FLOAD B12, 6*DATASIZE(B_row_ptr) + FLOAD B13, 7*DATASIZE(B_row_ptr) + addi B_row_ptr, B_row_ptr, 8*DATASIZE + + // Load A(:,l+1) + VLE A01, (A01_ptr) + VLE A11, (A11_ptr) + VLE A21, (A21_ptr) + VLE A31, (A31_ptr) + + vfmacc.vf AB20, B00, A20 // AB(2,:) += A(2,0) * B(0,:) + vfmacc.vf AB21, B01, A20 + vfmacc.vf AB22, B02, A20 + vfmacc.vf AB23, B03, A20 + + vfmacc.vf AB30, B00, A30 // AB(3,:) += A(3,0) * B(0,:) + vfmacc.vf AB31, B01, A30 + vfmacc.vf AB32, B02, A30 + vfmacc.vf AB33, B03, A30 + + // Point to A(:,l+2) + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + add A20_ptr, A21_ptr, s0 + add A30_ptr, A31_ptr, s0 + + vfmacc.vf AB00, B10, A01 // AB(0,:) += A(0,1) * B(1,:) + vfmacc.vf AB01, B11, A01 + vfmacc.vf AB02, B12, A01 + vfmacc.vf AB03, B13, A01 + + vfmacc.vf AB10, B10, A11 // AB(1,:) += A(1,1) * B(1,:) + vfmacc.vf AB11, B11, A11 + vfmacc.vf AB12, B12, A11 + vfmacc.vf AB13, B13, A11 + + vfmacc.vf AB20, B10, A21 // AB(2,:) += A(2,1) * B(1,:) + vfmacc.vf AB21, B11, A21 + vfmacc.vf AB22, B12, A21 + vfmacc.vf AB23, B13, A21 + + vfmacc.vf AB30, B10, A31 // AB(3,:) += A(3,1) * B(1,:) + vfmacc.vf AB31, B11, A31 + vfmacc.vf AB32, B12, A31 + vfmacc.vf AB33, B13, A31 + + li tmp, 1 + ble loop_counter, tmp, TAIL_UNROLL_1 + +TAIL_UNROLL_1: // loop_counter <= 1 + beqz loop_counter, MULTIPLYALPHA + + // Load row of B + FLOAD B00, 0*DATASIZE(B_row_ptr) + FLOAD B01, 1*DATASIZE(B_row_ptr) + FLOAD B02, 2*DATASIZE(B_row_ptr) + FLOAD B03, 3*DATASIZE(B_row_ptr) + + // Load A(:,l) + VLE A00, (A00_ptr) + VLE A10, (A10_ptr) + VLE A20, (A20_ptr) + VLE A30, (A30_ptr) + + vfmacc.vf AB00, B00, A00 // AB(0,:) += A(0,0) * B(0,:) + vfmacc.vf AB01, B01, A00 + vfmacc.vf AB02, B02, A00 + vfmacc.vf AB03, B03, A00 + + vfmacc.vf AB10, B00, A10 // AB(1,:) += A(1,0) * B(0,:) + vfmacc.vf AB11, B01, A10 + vfmacc.vf AB12, B02, A10 + vfmacc.vf AB13, B03, A10 + + vfmacc.vf AB20, B00, A20 // AB(2,:) += A(2,0) * B(0,:) + vfmacc.vf AB21, B01, A20 + vfmacc.vf AB22, B02, A20 + vfmacc.vf AB23, B03, A20 + + vfmacc.vf AB30, B00, A30 // AB(3,:) += A(3,0) * B(0,:) + vfmacc.vf AB31, B01, A30 + vfmacc.vf AB32, B02, A30 + vfmacc.vf AB33, B03, A30 + +MULTIPLYALPHA: + FLOAD ALPHA, (a1) + + // Multiply with alpha + vfmul.vf AB00, AB00, ALPHA + vfmul.vf AB01, AB01, ALPHA + vfmul.vf AB02, AB02, ALPHA + vfmul.vf AB03, AB03, ALPHA + + vfmul.vf AB10, AB10, ALPHA + vfmul.vf AB11, AB11, ALPHA + vfmul.vf AB12, AB12, ALPHA + vfmul.vf AB13, AB13, ALPHA + + vfmul.vf AB20, AB20, ALPHA + vfmul.vf AB21, AB21, ALPHA + vfmul.vf AB22, AB22, ALPHA + vfmul.vf AB23, AB23, ALPHA + + vfmul.vf AB30, AB30, ALPHA + vfmul.vf AB31, AB31, ALPHA + vfmul.vf AB32, AB32, ALPHA + vfmul.vf AB33, AB33, ALPHA + +MULTIPLYBETA: + FLOAD BETA, (a4) + FEQ tmp, BETA, fzero + beq tmp, zero, BETANOTZERO + +BETAZERO: + VSE AB00, (C00_ptr) + VSE AB01, (C01_ptr) + VSE AB02, (C02_ptr) + VSE AB03, (C03_ptr) + + add C00_ptr, C10_ptr, rs_c // advance pointers to row 2*VLEN + add C01_ptr, C11_ptr, rs_c + add C02_ptr, C12_ptr, rs_c + add C03_ptr, C13_ptr, rs_c + + VSE AB10, (C10_ptr) + VSE AB11, (C11_ptr) + VSE AB12, (C12_ptr) + VSE AB13, (C13_ptr) + + add C10_ptr, C00_ptr, rs_c // advance pointers to row 3*VLEN + add C11_ptr, C01_ptr, rs_c + add C12_ptr, C02_ptr, rs_c + add C13_ptr, C03_ptr, rs_c + + VSE AB20, (C00_ptr) + VSE AB21, (C01_ptr) + VSE AB22, (C02_ptr) + VSE AB23, (C03_ptr) + + VSE AB30, (C10_ptr) + VSE AB31, (C11_ptr) + VSE AB32, (C12_ptr) + VSE AB33, (C13_ptr) + + j END + +BETANOTZERO: + VLE C00, (C00_ptr) // Load C(0:VLEN-1, 0:3) + VLE C01, (C01_ptr) + VLE C02, (C02_ptr) + VLE C03, (C03_ptr) + + vfmacc.vf AB00, BETA, C00 + vfmacc.vf AB01, BETA, C01 + vfmacc.vf AB02, BETA, C02 + vfmacc.vf AB03, BETA, C03 + + VSE AB00, (C00_ptr) // Store C(0:VLEN-1, 0:3) + VSE AB01, (C01_ptr) + VSE AB02, (C02_ptr) + VSE AB03, (C03_ptr) + + add C00_ptr, C10_ptr, rs_c // advance pointers to row 2*VLEN + add C01_ptr, C11_ptr, rs_c + add C02_ptr, C12_ptr, rs_c + add C03_ptr, C13_ptr, rs_c + + VLE C10, (C10_ptr) // Load C(VLEN:2*VLEN-1, 0:3) + VLE C11, (C11_ptr) + VLE C12, (C12_ptr) + VLE C13, (C13_ptr) + + vfmacc.vf AB10, BETA, C10 + vfmacc.vf AB11, BETA, C11 + vfmacc.vf AB12, BETA, C12 + vfmacc.vf AB13, BETA, C13 + + VSE AB10, (C10_ptr) // Store C(VLEN:2*VLEN-1, 0:3) + VSE AB11, (C11_ptr) + VSE AB12, (C12_ptr) + VSE AB13, (C13_ptr) + + add C10_ptr, C00_ptr, rs_c // advance pointers to row 3*VLEN + add C11_ptr, C01_ptr, rs_c + add C12_ptr, C02_ptr, rs_c + add C13_ptr, C03_ptr, rs_c + + VLE C20, (C00_ptr) // Load C(2*VLEN:3*VLEN-1, 0:3) + VLE C21, (C01_ptr) + VLE C22, (C02_ptr) + VLE C23, (C03_ptr) + + vfmacc.vf AB20, BETA, C20 + vfmacc.vf AB21, BETA, C21 + vfmacc.vf AB22, BETA, C22 + vfmacc.vf AB23, BETA, C23 + + VSE AB20, (C00_ptr) // Store C(2*VLEN:3*VLEN-1, 0:3) + VSE AB21, (C01_ptr) + VSE AB22, (C02_ptr) + VSE AB23, (C03_ptr) + + VLE C30, (C10_ptr) // Load C(3*VLEN:4*VLEN-1, 0:3) + VLE C31, (C11_ptr) + VLE C32, (C12_ptr) + VLE C33, (C13_ptr) + + vfmacc.vf AB30, BETA, C30 + vfmacc.vf AB31, BETA, C31 + vfmacc.vf AB32, BETA, C32 + vfmacc.vf AB33, BETA, C33 + + VSE AB30, (C10_ptr) // Store C(3*VLEN:4*VLEN-1, 0:3) + VSE AB31, (C11_ptr) + VSE AB32, (C12_ptr) + VSE AB33, (C13_ptr) + +END: + #include "rvv_sg2042_restore_registers.h" + ret diff --git a/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_4vx4.c b/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_4vx4.c new file mode 100644 index 0000000000..7587539202 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_4vx4.c @@ -0,0 +1,80 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#include "bli_rvv_sg2042_utils.h" + +void bli_sgemm_rvv_sg2042_asm_4vx4 + ( + intptr_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c + ); + +void bli_sgemm_rvv_sg2042_4vx4 + ( + dim_t m, + dim_t n, + dim_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c, + const auxinfo_t* data, + const cntx_t* cntx + ) +{ + // The assembly kernels always take native machine-sized integer arguments. + // dim_t and inc_t are normally defined as being machine-sized. If larger, assert. + bli_static_assert( sizeof(dim_t) <= sizeof(intptr_t) && + sizeof(inc_t) <= sizeof(intptr_t) ); + + // Extract vector-length dependent mr, nr that are fixed at configure time. + const inc_t mr = bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_MR, cntx ); + const inc_t nr = 4; + + GEMM_UKR_SETUP_CT( s, mr, nr, false ); + + // The kernel assumes rs_c == 1, and the context should not deviate from it. + assert( rs_c == 1 ); + + bli_sgemm_rvv_sg2042_asm_4vx4( k, alpha, a, b, beta, c, + get_vlenb(), cs_c * sizeof(float) ); + + GEMM_UKR_FLUSH_CT( s ); +} diff --git a/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_asm_4vx4.S b/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_asm_4vx4.S new file mode 100644 index 0000000000..ab55a43a32 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_asm_4vx4.S @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#define REALNAME bli_sgemm_rvv_sg2042_asm_4vx4 +#define DATASIZE 4 +#define VTYPE e32 +#define FLOAD flw +#define FZERO(fr) fcvt.s.w fr, x0 +#define FEQ feq.s +#define VLE vle32.v +#define VSE vse32.v + +#include "bli_sdgemm_rvv_sg2042_asm_4vx4.h" diff --git a/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_4vx4.c b/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_4vx4.c new file mode 100644 index 0000000000..4806cd4be2 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_4vx4.c @@ -0,0 +1,80 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_rvv_sg2042_utils.h" + +void bli_zgemm_rvv_sg2042_asm_4vx4 + ( + intptr_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, intptr_t rs_c, intptr_t cs_c + ); + + +void bli_zgemm_rvv_sg2042_4vx4 + ( + dim_t m, + dim_t n, + dim_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c, + const auxinfo_t* data, + const cntx_t* cntx + ) +{ + // The assembly kernels always take native machine-sized integer arguments. + // dim_t and inc_t are normally defined as being machine-sized. If larger, assert. + bli_static_assert( sizeof(dim_t) <= sizeof(intptr_t) && + sizeof(inc_t) <= sizeof(intptr_t) ); + + // Extract vector-length dependent mr, nr that are fixed at configure time. + const inc_t mr = bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MR, cntx ); + const inc_t nr = 4; + + GEMM_UKR_SETUP_CT( z, mr, nr, false ); + + // The kernel assumes rs_c == 1, and the context should not deviate from it. + assert( rs_c == 1 ); + + bli_zgemm_rvv_sg2042_asm_4vx4( k, alpha, a, b, beta, c, + get_vlenb() * 2, cs_c * sizeof(dcomplex) ); + + GEMM_UKR_FLUSH_CT( z ); +} diff --git a/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_asm_4vx4.S b/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_asm_4vx4.S new file mode 100644 index 0000000000..6386c8acf6 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_asm_4vx4.S @@ -0,0 +1,44 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#define REALNAME bli_zgemm_rvv_sg2042_asm_4vx4 +#define DATASIZE 16 +#define VTYPE e64 +#define FLOAD fld +#define FZERO(fr) fcvt.d.w fr, x0 +#define FEQ feq.d +#define VLE vlseg2e64.v +#define VSE vsseg2e64.v + +#include "bli_czgemm_rvv_sg2042_asm_4vx4.h" diff --git a/kernels/rvv_sg2042/3/rvv_sg2042_restore_registers.h b/kernels/rvv_sg2042/3/rvv_sg2042_restore_registers.h new file mode 100644 index 0000000000..bcf7d17c8b --- /dev/null +++ b/kernels/rvv_sg2042/3/rvv_sg2042_restore_registers.h @@ -0,0 +1,77 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + + +// 128-bit RISC-V is assumed to support the __riscv_xlen test macro +#if __riscv_xlen == 128 // false if !defined(__riscv_xlen) + + lq s7, 112(sp) + lq s6, 96(sp) + lq s5, 80(sp) + lq s4, 64(sp) + lq s3, 48(sp) + lq s2, 32(sp) + lq s1, 16(sp) + lq s0, 0(sp) + addi sp, sp, 128 + +// 64-bit RISC-V can be indicated by either __riscv_xlen == 64 or +// RISCV_SIZE == 64, to support toolchains which do not currently +// support __riscv_xlen. If a macro is undefined, it is considered 0. +#elif __riscv_xlen == 64 || RISCV_SIZE == 64 + + ld s7, 56(sp) + ld s6, 48(sp) + ld s5, 40(sp) + ld s4, 32(sp) + ld s3, 24(sp) + ld s2, 16(sp) + ld s1, 8(sp) + ld s0, 0(sp) + addi sp, sp, 64 + +#else +// else 32-bit RISC-V is assumed + + lw s7, 28(sp) + lw s6, 24(sp) + lw s5, 20(sp) + lw s4, 16(sp) + lw s3, 12(sp) + lw s2, 8(sp) + lw s1, 4(sp) + lw s0, 0(sp) + addi sp, sp, 32 + +#endif diff --git a/kernels/rvv_sg2042/3/rvv_sg2042_save_registers.h b/kernels/rvv_sg2042/3/rvv_sg2042_save_registers.h new file mode 100644 index 0000000000..537c76ca66 --- /dev/null +++ b/kernels/rvv_sg2042/3/rvv_sg2042_save_registers.h @@ -0,0 +1,77 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +// 128-bit RISC-V is assumed to support the __riscv_xlen test macro +#if __riscv_xlen == 128 // false if !defined(__riscv_xlen) + + addi sp, sp, -128 + sq s7, 112(sp) + sq s6, 96(sp) + sq s5, 80(sp) + sq s4, 64(sp) + sq s3, 48(sp) + sq s2, 32(sp) + sq s1, 16(sp) + sq s0, 0(sp) + +// 64-bit RISC-V can be indicated by either __riscv_xlen == 64 or +// RISCV_SIZE == 64, to support toolchains which do not currently +// support __riscv_xlen. If a macro is undefined, it is considered 0. +#elif __riscv_xlen == 64 || RISCV_SIZE == 64 + + addi sp, sp, -64 + sd s7, 56(sp) + sd s6, 48(sp) + sd s5, 40(sp) + sd s4, 32(sp) + sd s3, 24(sp) + sd s2, 16(sp) + sd s1, 8(sp) + sd s0, 0(sp) + +#else +// else 32-bit RISC-V is assumed + + addi sp, sp, -32 + sw s7, 28(sp) + sw s6, 24(sp) + sw s5, 20(sp) + sw s4, 16(sp) + sw s3, 12(sp) + sw s2, 8(sp) + sw s1, 4(sp) + sw s0, 0(sp) + +#endif diff --git a/kernels/rvv_sg2042/bli_kernels_rvv_sg2042.h b/kernels/rvv_sg2042/bli_kernels_rvv_sg2042.h new file mode 100644 index 0000000000..3c3335619b --- /dev/null +++ b/kernels/rvv_sg2042/bli_kernels_rvv_sg2042.h @@ -0,0 +1,38 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +GEMM_UKR_PROT( float, s, gemm_rvv_sg2042_4vx4 ) +GEMM_UKR_PROT( double, d, gemm_rvv_sg2042_4vx4 ) +GEMM_UKR_PROT( scomplex, c, gemm_rvv_sg2042_4vx4 ) +GEMM_UKR_PROT( dcomplex, z, gemm_rvv_sg2042_4vx4 ) diff --git a/share/blis/avx.s b/share/blis/avx.s new file mode 100644 index 0000000000..4b6043e872 --- /dev/null +++ b/share/blis/avx.s @@ -0,0 +1,6 @@ +// +// Test for AVX instruction set. +// +vzeroall +vmovapd %ymm0, %ymm1 +vmulpd %ymm0, %ymm0, %ymm1 diff --git a/share/blis/avx512dq.s b/share/blis/avx512dq.s new file mode 100644 index 0000000000..7eb1c6dea8 --- /dev/null +++ b/share/blis/avx512dq.s @@ -0,0 +1,6 @@ +// +// Test for AVX-512dq instruction set. +// +vzeroall +vpmullq %zmm0, %zmm0, %zmm1 +vpmullw %zmm0, %zmm0, %zmm1 diff --git a/share/blis/avx512f.s b/share/blis/avx512f.s new file mode 100644 index 0000000000..a07dad0ca1 --- /dev/null +++ b/share/blis/avx512f.s @@ -0,0 +1,7 @@ +// +// Test for AVX-512f instruction set. +// +vzeroall +vmovapd %zmm0, %zmm1 +vmulpd %zmm0, %zmm0, %zmm1 +vfmadd213pd 0x400(%rax,%rsi,8) {1to8}, %zmm1, %zmm2 diff --git a/share/blis/common.mk b/share/blis/common.mk new file mode 100644 index 0000000000..584eb20f43 --- /dev/null +++ b/share/blis/common.mk @@ -0,0 +1,1365 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + +# Only include this block of code once +ifndef COMMON_MK_INCLUDED +COMMON_MK_INCLUDED := yes + + + +# +# --- CFLAGS storage functions ------------------------------------------------- +# + +# Define a function that stores the value of a variable to a different +# variable containing a specified suffix (corresponding to a configuration). +define store-var-for +$(strip $(1)).$(strip $(2)) := $($(strip $(1))) +endef + +# Define a function similar to store-var-for, except that appends instead +# of overwriting. +define append-var-for +$(strip $(1)).$(strip $(2)) += $($(strip $(1))) +endef + +# Define a function that stores the value of all of the variables in a +# make_defs.mk file to other variables with the configuration (the +# argument $(1)) added as a suffix. This function is called once from +# each make_defs.mk. Also, add the configuration to CONFIGS_INCL. +define store-make-defs +$(eval $(call store-var-for,CC, $(1))) +$(eval $(call store-var-for,CC_VENDOR, $(1))) +$(eval $(call store-var-for,CPPROCFLAGS,$(1))) +$(eval $(call store-var-for,CLANGFLAGS, $(1))) +$(eval $(call store-var-for,CMISCFLAGS, $(1))) +$(eval $(call store-var-for,CPICFLAGS, $(1))) +$(eval $(call store-var-for,CWARNFLAGS, $(1))) +$(eval $(call store-var-for,CDBGFLAGS, $(1))) +$(eval $(call store-var-for,COPTFLAGS, $(1))) +$(eval $(call store-var-for,CKOPTFLAGS, $(1))) +$(eval $(call store-var-for,CKVECFLAGS, $(1))) +$(eval $(call store-var-for,CROPTFLAGS, $(1))) +$(eval $(call store-var-for,CRVECFLAGS, $(1))) +CONFIGS_INCL += $(1) +endef + +# Define a function that retreives the value of a variable for a +# given configuration. +define load-var-for +$($(strip $(1)).$(strip $(2))) +endef + + + +# +# --- CFLAGS query functions --------------------------------------------------- +# + +# Define some functions that return the appropriate CFLAGS for a given +# configuration. This assumes that the make_defs.mk files have already been +# included, which results in those values having been stored to +# configuration-qualified variables. + +get-noopt-cflags-for = $(strip $(CFLAGS_PRESET) \ + $(call load-var-for,CDBGFLAGS,$(1)) \ + $(call load-var-for,CWARNFLAGS,$(1)) \ + $(call load-var-for,CPICFLAGS,$(1)) \ + $(call load-var-for,CMISCFLAGS,$(1)) \ + $(call load-var-for,CLANGFLAGS,$(1)) \ + $(call load-var-for,CPPROCFLAGS,$(1)) \ + $(CTHREADFLAGS) \ + $(CINCFLAGS) \ + ) + +get-noopt-cxxflags-for = $(strip $(CXXFLAGS_PRESET) \ + $(call load-var-for,CDBGFLAGS,$(1)) \ + $(call load-var-for,CWARNFLAGS,$(1)) \ + $(call load-var-for,CPICFLAGS,$(1)) \ + $(call load-var-for,CMISCFLAGS,$(1)) \ + $(call load-var-for,CPPROCFLAGS,$(1)) \ + $(CXXLANGFLAGS) \ + $(CTHREADFLAGS) \ + $(CXXTHREADFLAGS) \ + $(CINCFLAGS) \ + ) + +get-refinit-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + -DBLIS_IN_REF_KERNEL=1 \ + -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ + ) + +get-refinit-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + -DBLIS_IN_REF_KERNEL=1 \ + -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ + ) + +get-refkern-cflags-for = $(strip $(call load-var-for,CROPTFLAGS,$(1)) \ + $(call load-var-for,CRVECFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(COMPSIMDFLAGS) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + -DBLIS_IN_REF_KERNEL=1 \ + -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ + ) + +get-refkern-cxxflags-for = $(strip $(call load-var-for,CROPTFLAGS,$(1)) \ + $(call load-var-for,CRVECFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(COMPSIMDFLAGS) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + -DBLIS_IN_REF_KERNEL=1 \ + -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ + ) + +get-config-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-config-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-frame-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-frame-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-kernel-cflags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ + $(call load-var-for,CKVECFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-kernel-cxxflags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ + $(call load-var-for,CKVECFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +# When compiling addons, we use flags similar to those of general framework +# source. This ensures that the same code can be linked and run across various +# sub-configurations. +get-addon-c99flags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(CADDONINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) +get-addon-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(CADDONINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) +# When compiling addon kernels, we use flags similar to those of kernels +# flags, except we also include the addon header paths. +get-addon-kernel-c99flags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ + $(call load-var-for,CKVECFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(CADDONINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +# When compiling sandboxes, we use flags similar to those of general framework +# source. This ensures that the same code can be linked and run across various +# sub-configurations. (NOTE: If we ever switch to using refkernel or kernel +# flags, we should prevent enabling sandboxes for umbrella families by verifying +# that config_list == config_name if --enable-sandbox is given. THIS ALSO +# APPLIES TO ADDONS ABOVE.) +get-sandbox-c99flags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(CSANDINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) +get-sandbox-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(CSANDINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +# Define a separate function that will return appropriate flags for use by +# applications that want to use the same basic flags as those used when BLIS +# was compiled. (NOTE: This is the same as the $(get-frame-cflags-for ...) +# function, except that it omits a few variables that contain flags exclusively +# for use when BLIS is being compiled/built: +# - BUILD_CPPFLAGS, which contains a cpp macro that confirms that BLIS +# is being built; +# - BUILD_SYMFLAGS, which contains symbol export flags that are only +# needed when a shared library is being compiled/linked; and +# - BUILD_ASANFLAGS, which contains a flag that causes the compiler to +# insert instrumentation for memory error detection. +get-user-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + ) + +# Define functions that return messages appropriate for each non-verbose line +# of compilation output. +get-noopt-text = "(CFLAGS for no optimization)" +get-refinit-text-for = "('$(1)' CFLAGS for ref. kernel init)" +get-refinit-cxxtext-for = "('$(1)' CXXFLAGS for ref. kernel init)" +get-refkern-text-for = "('$(1)' CFLAGS for ref. kernels)" +get-refkern-cxxtext-for = "('$(1)' CXXFLAGS for ref. kernels)" +get-config-text-for = "('$(1)' CFLAGS for config code)" +get-config-cxxtext-for = "('$(1)' CXXFLAGS for config code)" +get-frame-text-for = "('$(1)' CFLAGS for framework code)" +get-frame-cxxtext-for = "('$(1)' CXXFLAGS for framework code)" +get-kernel-text-for = "('$(1)' CFLAGS for kernels)" +get-kernel-cxxtext-for = "('$(1)' CXXFLAGS for kernels)" +get-addon-c99text-for = "('$(1)' CFLAGS for addons)" +get-addon-cxxtext-for = "('$(1)' CXXFLAGS for addons)" +get-addon-kernel-text-for = "('$(1)' CFLAGS for addon kernels)" +get-sandbox-c99text-for = "('$(1)' CFLAGS for sandboxes)" +get-sandbox-cxxtext-for = "('$(1)' CXXFLAGS for sandboxes)" + + + +# +# --- Miscellaneous helper functions ------------------------------------------- +# + +# Define functions that filters a list of filepaths $(1) that contain (or +# omit) an arbitrary substring $(2). +files-that-contain = $(strip $(foreach f, $(1), $(if $(findstring $(2),$(f)),$(f),))) +files-that-dont-contain = $(strip $(foreach f, $(1), $(if $(findstring $(2),$(f)),,$(f)))) + +# Define a function that removes duplicate strings *without* using the sort +# function. +rm-dups = $(if $1,$(firstword $1) $(call rm-dups,$(filter-out $(firstword $1),$1))) + + + +# +# --- Include makefile configuration file -------------------------------------- +# + +# Use the current directory as the default path to the root directory for +# makefile fragments (and the configuration family's make_defs.mk), but +# allow the includer to override this value if it needs to point to an +# installation directory. +ifeq ($(strip $(SHARE_PATH)),) +SHARE_PATH := . +endif + +# Define the name of the configuration file. +CONFIG_MK_FILE := config.mk + +# Identify the base path for the root directory for makefile fragments (and +# the configuration family's make_defs.mk). We define this path in terms of +# SHARE_PATH, which gets a default value above (which is what happens for the +# top-level Makefile). If SHARE_PATH is specified by the Makefile prior to +# including common.mk, that path is used instead. This allows Makefiles for +# example code and test drivers to reference an installed prefix directory +# for situations when the build directory no longer exists. +BASE_SHARE_PATH := $(SHARE_PATH) + +# Include the configuration file. +-include $(BASE_SHARE_PATH)/$(CONFIG_MK_FILE) + + + +# +# --- Handle 'make clean' and friends without config.mk ------------------------ +# + +# Detect whether we actually got the configuration file. If we didn't, then +# it is likely that the user has not yet generated it (via configure). +ifeq ($(strip $(CONFIG_MK_INCLUDED)),yes) +CONFIG_MK_PRESENT := yes +IS_CONFIGURED := yes +else +CONFIG_MK_PRESENT := no +IS_CONFIGURED := no +endif + +# If we didn't get config.mk, then we need to set some basic variables so +# that make will function without error for things like 'make clean'. +ifeq ($(IS_CONFIGURED),no) + +# If this makefile fragment is being run and there is no config.mk present, +# then it's probably safe to assume that the user is currently located in the +# source distribution. +DIST_PATH := . + +# Even though they won't be used explicitly, it appears that setting these +# INSTALL_* variables to something sane (that is, not allowing them default +# to the empty string) is necessary to prevent make from hanging, likely +# because the statements that define UNINSTALL_LIBS and UNINSTALL_HEADERS, +# when evaluated, result in running 'find' on the root directory--definitely +# something we would like to avoid. +INSTALL_LIBDIR := $(HOME)/blis/lib +INSTALL_INCDIR := $(HOME)/blis/include +INSTALL_SHAREDIR := $(HOME)/blis/share + +endif + + + +# +# --- Primary makefile variable definitions ------------------------------------ +# + +# Construct the architecture-version string, which will be used to name the +# library upon installation. +VERS_CONF := $(VERSION)-$(CONFIG_NAME) + +# All makefile fragments in the tree will have this name. +FRAGMENT_MK := .fragment.mk + +# Locations of important files. +BUILD_DIR := build +CONFIG_DIR := config +ifeq ($(FRAME_DIR),) +FRAME_DIR := frame +endif +REFKERN_DIR := ref_kernels +KERNELS_DIR := kernels +ADDON_DIR := addon +SANDBOX_DIR := sandbox +OBJ_DIR := obj +LIB_DIR := lib +INCLUDE_DIR := include +BLASTEST_DIR := blastest +TESTSUITE_DIR := testsuite + +VEND_DIR := vendor +VEND_CPP_DIR := $(VEND_DIR)/cpp +VEND_TESTCPP_DIR := $(VEND_DIR)/testcpp + +# The filename suffix for reference kernels. +REFNM := ref + +# Source suffixes. +CONFIG_SRC_SUFS := c +KERNELS_SRC_SUFS := c s S +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +FRAME_SRC_SUFS := c cpp +else +FRAME_SRC_SUFS := c +endif + +ADDON_C99_SUFS := c +ADDON_CXX_SUFS := cc cpp cxx +ADDON_SRC_SUFS := $(ADDON_C99_SUFS) $(ADDON_CXX_SUFS) + +SANDBOX_C99_SUFS := c +SANDBOX_CXX_SUFS := cc cpp cxx +SANDBOX_SRC_SUFS := $(SANDBOX_C99_SUFS) $(SANDBOX_CXX_SUFS) + +# Header suffixes. +FRAME_H99_SUFS := h +FRAME_HDR_SUFS := $(FRAME_H99_SUFS) + +ADDON_H99_SUFS := h +ADDON_HXX_SUFS := hh hpp hxx +ADDON_HDR_SUFS := $(ADDON_H99_SUFS) $(ADDON_HXX_SUFS) + +SANDBOX_H99_SUFS := h +SANDBOX_HXX_SUFS := hh hpp hxx +SANDBOX_HDR_SUFS := $(SANDBOX_H99_SUFS) $(SANDBOX_HXX_SUFS) + +# Combine all header suffixes and remove duplicates via sort(). +ALL_HDR_SUFS := $(sort $(FRAME_HDR_SUFS) \ + $(ADDON_HDR_SUFS) \ + $(SANDBOX_HDR_SUFS) ) + +ALL_H99_SUFS := $(sort $(FRAME_H99_SUFS) \ + $(ADDON_H99_SUFS) \ + $(SANDBOX_H99_SUFS) ) + +# The names of scripts that check output from the BLAS test drivers and +# BLIS test suite. +BLASTEST_CHECK := check-blastest.sh +TESTSUITE_CHECK := check-blistest.sh + +# The names of the testsuite input/configuration files. +TESTSUITE_CONF_GEN := input.general +TESTSUITE_CONF_OPS := input.operations +TESTSUITE_FAST_GEN := input.general.fast +TESTSUITE_FAST_OPS := input.operations.fast +TESTSUITE_MIXD_GEN := input.general.mixed +TESTSUITE_MIXD_OPS := input.operations.mixed +TESTSUITE_SALT_GEN := input.general.salt +TESTSUITE_SALT_OPS := input.operations.salt +TESTSUITE_OUT_FILE := output.testsuite + +# CHANGELOG file. +CHANGELOG := CHANGELOG + +# Something for OS X so that echo -n works as expected. +SHELL := bash + +# Construct paths to the four primary directories of source code: +# the config directory, general framework code, reference kernel code, +# and optimized kernel code. Also process paths for addon and sandbox +# directories. +CONFIG_PATH := $(DIST_PATH)/$(CONFIG_DIR) +FRAME_PATH := $(DIST_PATH)/$(FRAME_DIR) +REFKERN_PATH := $(DIST_PATH)/$(REFKERN_DIR) +KERNELS_PATH := $(DIST_PATH)/$(KERNELS_DIR) +ADDON_PATH := $(DIST_PATH)/$(ADDON_DIR) +SANDBOX_PATH := $(DIST_PATH)/$(SANDBOX_DIR) +BUILD_PATH := $(DIST_PATH)/$(BUILD_DIR) + +# Construct paths to some optional C++ template headers contributed by AMD. +VEND_CPP_PATH := $(DIST_PATH)/$(VEND_CPP_DIR) +VEND_TESTCPP_PATH := $(DIST_PATH)/$(VEND_TESTCPP_DIR) + +# Construct paths to the makefile fragments for the four primary directories +# of source code: the config directory, general framework code, reference +# kernel code, and optimized kernel code. +CONFIG_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(CONFIG_DIR) +FRAME_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(FRAME_DIR) +REFKERN_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(REFKERN_DIR) +KERNELS_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(KERNELS_DIR) +ADDON_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(ADDON_DIR) +SANDBOX_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(SANDBOX_DIR) + + + +# +# --- Library name and local paths --------------------------------------------- +# + +# Use lib/CONFIG_NAME as the default path to the local header files, but +# allow the includer to override this value if it needs to point to an +# installation directory. +ifeq ($(strip $(LIB_PATH)),) +LIB_PATH := $(LIB_DIR)/$(CONFIG_NAME) +endif + +# Identify the base path for the intermediate library directory. We define +# this path in terms of LIB_PATH, which gets a default value above (which is +# what happens for the top-level Makefile). If LIB_PATH is specified by the +# Makefile prior to including common.mk, that path is used instead. This +# allows Makefiles for example code and test drivers to reference an installed +# prefix directory for situations when the build directory no longer exists. +BASE_LIB_PATH := $(LIB_PATH) + +# The base name of the BLIS library that we will build. +LIBBLIS := libblis + +# The shared (dynamic) library file suffix is different for Linux and OS X. +ifeq ($(OS_NAME),Darwin) +SHLIB_EXT := dylib +else ifeq ($(IS_WIN),yes) +ifeq ($(IS_MSVC),no) +SHLIB_EXT := dll.a +else +SHLIB_EXT := lib +endif +else +SHLIB_EXT := so +endif + +# Note: These names will be modified later to include the configuration and +# version strings. +LIBBLIS_A := $(LIBBLIS).a +LIBBLIS_SO := $(LIBBLIS).$(SHLIB_EXT) + +# Append the base library path to the library names. +LIBBLIS_A_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_A) +LIBBLIS_SO_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_SO) + +# Create a filepath to a local symlink to the soname--that is, the same as +# LIBBLIS_SO_PATH except with the .so major version number. Since the shared +# library lists its soname as 'libblis.so.n', where n is the .so major version +# number, a symlink in BASE_LIB_PATH is needed so that ld can find the local +# shared library when the testsuite is run via 'make test' or 'make check'. + +ifeq ($(OS_NAME),Darwin) +# OS X shared library extensions. +LIBBLIS_SO_MAJ_EXT := $(SO_MAJOR).$(SHLIB_EXT) +LIBBLIS_SO_MMB_EXT := $(SO_MMB).$(SHLIB_EXT) +else ifeq ($(IS_WIN),yes) +# Windows shared library extension. +LIBBLIS_SO_MAJ_EXT := $(SO_MAJOR).dll +LIBBLIS_SO_MMB_EXT := +else +# Linux shared library extensions. +LIBBLIS_SO_MAJ_EXT := $(SHLIB_EXT).$(SO_MAJOR) +LIBBLIS_SO_MMB_EXT := $(SHLIB_EXT).$(SO_MMB) +endif +LIBBLIS_SONAME := $(LIBBLIS).$(LIBBLIS_SO_MAJ_EXT) +LIBBLIS_SO_MAJ_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_SONAME) + +# Construct the output path when building a shared library. +# NOTE: This code and the code immediately above is a little curious and +# perhaps could be refactored (carefully). +ifeq ($(IS_WIN),yes) +LIBBLIS_SO_OUTPUT_NAME := $(LIBBLIS_SO_MAJ_PATH) +else +LIBBLIS_SO_OUTPUT_NAME := $(LIBBLIS_SO_PATH) +endif + + + +# +# --- Utility program definitions ---------------------------------------------- +# + +SH := /bin/sh +MV := mv +MKDIR := mkdir -p +RM_F := rm -f +RM_RF := rm -rf +SYMLINK := ln -sf +FIND := find +GREP := grep +EGREP := grep -E +XARGS := xargs +INSTALL := install -c +DEVNULL := /dev/null + +# Script for creating a monolithic header file. +#FLATTEN_H := $(DIST_PATH)/build/flatten-headers.sh +FLATTEN_H := $(PYTHON) $(DIST_PATH)/build/flatten-headers.py + +# Default archiver flags. +ARFLAGS := cr + +# Used to refresh CHANGELOG. +GIT := git +GIT_LOG := $(GIT) log --decorate + +# Define the locations of a script to generate a list of shared library symbols +# within BLIS as well as the symbol file itself. +GEN_SYMS := $(BUILD_PATH)/gen-libblis-symbols.sh +SYM_FILE := $(BUILD_PATH)/libblis-symbols.def + + + +# +# --- Default linker definitions ----------------------------------------------- +# + +# NOTE: This section needs to reside before the inclusion of make_defs.mk +# files (just below), as most configurations' make_defs.mk don't tinker +# with things like LDFLAGS, but some do (or may), in which case they can +# manually override whatever they need. + +# Define the external libraries we may potentially need at link-time. +ifeq ($(IS_MSVC),yes) +LIBM := +else +LIBM := -lm +endif +LIBMEMKIND := -lmemkind + +# Default linker flags. +# NOTE: -lpthread is needed unconditionally because BLIS uses pthread_once() +# to initialize itself in a thread-safe manner. The one exception to this +# rule: if --disable-system is given at configure-time, LIBPTHREAD is empty. +LDFLAGS := $(LDFLAGS_PRESET) $(LIBM) $(LIBPTHREAD) + +# Add libmemkind to the link-time flags, if it was enabled at configure-time. +ifeq ($(MK_ENABLE_MEMKIND),yes) +LDFLAGS += $(LIBMEMKIND) +endif + +# Never use libm with Intel compilers. +ifeq ($(CC_VENDOR),icc) +LDFLAGS := $(filter-out $(LIBM),$(LDFLAGS)) +endif + +# Never use libmemkind with Intel SDE. +ifeq ($(DEBUG_TYPE),sde) +LDFLAGS := $(filter-out $(LIBMEMKIND),$(LDFLAGS)) +endif + +# If AddressSanitizer is enabled, add the compiler flag to LDFLAGS. +ifeq ($(MK_ENABLE_ASAN),yes) +LDFLAGS += -fsanitize=address +endif + +# Specify the shared library's 'soname' field. +# NOTE: The flag for creating shared objects is different for Linux and OS X. +ifeq ($(OS_NAME),Darwin) +# OS X shared library link flags. +SOFLAGS := -dynamiclib +ifeq ($(MK_ENABLE_RPATH),yes) +SOFLAGS += -Wl,-install_name,@rpath/$(LIBBLIS_SONAME) +else +SOFLAGS += -Wl,-install_name,$(libdir)/$(LIBBLIS_SONAME) +endif +else +SOFLAGS := -shared +ifeq ($(IS_WIN),yes) +# Windows shared library link flags. +ifeq ($(IS_MSVC),yes) +SOFLAGS += -Wl,-implib:$(BASE_LIB_PATH)/$(LIBBLIS).lib +else +SOFLAGS += -Wl,--out-implib,$(BASE_LIB_PATH)/$(LIBBLIS).dll.a +endif +else +# Linux shared library link flags. +SOFLAGS += -Wl,-soname,$(LIBBLIS_SONAME) +endif +endif + +# Decide which library to link to for things like the testsuite and BLIS test +# drivers. We default to the static library, unless only the shared library was +# enabled, in which case we use the shared library. +LIBBLIS_L := $(LIBBLIS_A) +LIBBLIS_LINK := $(LIBBLIS_A_PATH) +ifeq ($(MK_ENABLE_SHARED),yes) +ifeq ($(MK_ENABLE_STATIC),no) +LIBBLIS_L := $(LIBBLIS_SO) +LIBBLIS_LINK := $(LIBBLIS_SO_PATH) +ifeq ($(IS_WIN),no) +# For Linux and OS X: set rpath property of shared object. +ifeq ($(OS_NAME),Darwin) +# rpath for test_libblis.x +LDFLAGS += -Wl,-rpath,@executable_path/$(BASE_LIB_PATH) +# rpath for BLAS tests +LDFLAGS += -Wl,-rpath,@executable_path/../../../$(BASE_LIB_PATH) +else +# rpath for test_libblis.x +LDFLAGS += -Wl,-rpath,'$$ORIGIN/$(BASE_LIB_PATH)' +# rpath for BLAS tests +LDFLAGS += -Wl,-rpath,'$$ORIGIN/../../../$(BASE_LIB_PATH)' +endif +endif +endif +# On windows, use the shared library even if static is created. +ifeq ($(IS_WIN),yes) +LIBBLIS_L := $(LIBBLIS_SO) +LIBBLIS_LINK := $(LIBBLIS_SO_PATH) +endif +endif + + + +# +# --- Include makefile definitions file ---------------------------------------- +# + +# Define the name of the file containing build and architecture-specific +# makefile definitions. +MAKE_DEFS_FILE := make_defs.mk + +# Assemble a list of all configuration family members, including the +# configuration family name itself. Note that sort() will remove duplicates +# for situations where CONFIG_NAME is present in CONFIG_LIST, such as would +# be the case for singleton families. +CONFIG_LIST_FAM := $(sort $(strip $(CONFIG_LIST) $(CONFIG_NAME))) + +# Construct the paths to the makefile definitions files, each of which +# resides in a separate configuration sub-directory. We use CONFIG_LIST_FAM +# since we might need the makefile definitions associated with the +# configuration family (if it is an umbrella family). +# NOTE: We use the prefix $(BASE_SHARE_PATH)/$(CONFIG_DIR)/ instead of +# $(CONFIG_PATH) so that make_defs.mk can be found when it is installed, +# provided the caller defined SHARE_PATH to that install directory. +CONFIG_PATHS := $(addprefix $(BASE_SHARE_PATH)/$(CONFIG_DIR)/, \ + $(CONFIG_LIST_FAM)) +MAKE_DEFS_MK_PATHS := $(addsuffix /$(MAKE_DEFS_FILE), $(CONFIG_PATHS)) + +# Initialize the list of included (found) configurations to empty. +CONFIGS_INCL := + +# Include the makefile definitions files implied by the list of configurations. +-include $(MAKE_DEFS_MK_PATHS) + +# Detect whether we actually got all of the make definitions files. If +# we didn't, then maybe a configuration is mislabeled or missing. The +# check-env-make-defs target checks ALL_MAKE_DEFS_MK_PRESENT and outputs +# an error message if it is set to 'no'. +# NOTE: We use CONFIG_LIST_FAM as the expected list of configurations. +# This combines CONFIG_NAME with CONFIG_LIST. The inclusion of CONFIG_NAME +# is needed for situations where the configuration family is an umbrella +# family (e.g. 'intel64'), since families have separate make_def.mk files. +CONFIGS_EXPECTED := $(CONFIG_LIST_FAM) +ifeq ($(sort $(strip $(CONFIGS_INCL))), \ + $(sort $(strip $(CONFIGS_EXPECTED)))) +ALL_MAKE_DEFS_MK_PRESENT := yes +else +ALL_MAKE_DEFS_MK_PRESENT := no +endif + + + +# +# --- Configuration-agnostic flags --------------------------------------------- +# + +# --- Linker program --- + +# Use whatever compiler was chosen. A C++ compiler must be used if HPX is enabled. +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +LINKER := $(CXX) +else +LINKER := $(CC) +endif + +# --- Warning flags --- + +CWARNFLAGS := + +# Do not allow functions with implicit definitions to be called +ifneq ($(CC_VENDOR),ibm) +CWARNFLAGS += -Werror=implicit-function-declaration +endif + +# Disable unused function warnings and stop compiling on first error for +# all compilers that accept such options: gcc, clang, and icc. +ifneq ($(CC_VENDOR),ibm) +ifneq ($(CC_VENDOR),nvc) +CWARNFLAGS += -Wall -Wno-unused-function -Wfatal-errors +else +CWARNFLAGS += -Wall -Wno-unused-function +endif +endif + +# Disable tautological comparision warnings in clang. +ifeq ($(CC_VENDOR),clang) +CWARNFLAGS += -Wno-tautological-compare -Wno-pass-failed +endif + +# Disable other annoying warnings. +ifeq ($(CC_VENDOR),clang) +CWARNFLAGS += +else +ifeq ($(CC_VENDOR),gcc) +# The '-Wno-maybe-uninitialized' option makes me nervous. Let's temporarily +# disable for now. -FGVZ +#CWARNFLAGS += -Wno-maybe-uninitialized -Wno-comment +CWARNFLAGS += -Wno-comment +endif +endif + +$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CWARNFLAGS,$(c)))) + +# --- Position-independent code flags (shared libraries only) --- + +# Note: Avoid -fPIC flags for Windows builds since all code is position- +# independent. +ifeq ($(IS_MSVC),yes) +CPICFLAGS := +endif +$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call store-var-for,CPICFLAGS,$(c)))) + +# --- Symbol exporting flags (shared libraries only) --- + +ifeq ($(MK_ENABLE_SHARED),yes) + +# NOTE: These flags are only applied when building BLIS and not used by +# applications that import BLIS compilation flags via the +# $(get-user-cflags-for ...) function. + +# Determine default export behavior / visibility of symbols for gcc. +ifeq ($(CC_VENDOR),gcc) +ifeq ($(IS_WIN),yes) +ifeq ($(EXPORT_SHARED),all) +BUILD_SYMFLAGS := -Wl,--export-all-symbols, -Wl,--enable-auto-import +else # ifeq ($(EXPORT_SHARED),public) +BUILD_SYMFLAGS := -Wl,--exclude-all-symbols +endif +else # ifeq ($(IS_WIN),no) +ifeq ($(EXPORT_SHARED),all) +# Export all symbols by default. +BUILD_SYMFLAGS := -fvisibility=default +else # ifeq ($(EXPORT_SHARED),public) +# Hide all symbols by default and export only those that have been annotated +# as needing to be exported. +BUILD_SYMFLAGS := -fvisibility=hidden +endif +endif +endif + +# Determine default export behavior / visibility of symbols for icc. +# NOTE: The Windows branches have been omitted since we currently make no +# effort to support Windows builds via icc (only gcc/clang via AppVeyor). +ifeq ($(CC_VENDOR),icc) +ifeq ($(EXPORT_SHARED),all) +# Export all symbols by default. +BUILD_SYMFLAGS := -fvisibility=default +else # ifeq ($(EXPORT_SHARED),public) +# Hide all symbols by default and export only those that have been annotated +# as needing to be exported. +BUILD_SYMFLAGS := -fvisibility=hidden +endif +endif + +# Determine default export behavior / visibility of symbols for clang. +ifeq ($(CC_VENDOR),clang) +ifeq ($(IS_WIN),yes) +ifeq ($(IS_MSVC),no) +# This is a clang build targetting MinGW-w64 env +ifeq ($(EXPORT_SHARED),all) +BUILD_SYMFLAGS := -Wl,--export-all-symbols, -Wl,--enable-auto-import +else # ifeq ($(EXPORT_SHARED),all) +BUILD_SYMFLAGS := -Wl,--exclude-all-symbols +endif +endif # ifeq ($(IS_MSVC),no) +ifeq ($(EXPORT_SHARED),all) +# NOTE: clang on Windows does not appear to support exporting all symbols +# by default, and therefore we ignore the value of EXPORT_SHARED. +BUILD_SYMFLAGS := +else # ifeq ($(EXPORT_SHARED),public) +# NOTE: The default behavior of clang on Windows is to hide all symbols +# and only export functions and other declarations that have beenannotated +# as needing to be exported. +BUILD_SYMFLAGS := +endif +else # ifeq ($(IS_WIN),no) +ifeq ($(EXPORT_SHARED),all) +# Export all symbols by default. +BUILD_SYMFLAGS := -fvisibility=default +else # ifeq ($(EXPORT_SHARED),public) +# Hide all symbols by default and export only those that have been annotated +# as needing to be exported. +BUILD_SYMFLAGS := -fvisibility=hidden +endif +endif +endif + +else #ifeq ($(MK_ENABLE_SHARED),no) + +# Don't modify CPICFLAGS for the various configuration family members. +# Don't use any special symbol export flags. +BUILD_SYMFLAGS := + +endif + +# --- Language flags --- + +# Enable C99. +CLANGFLAGS := -std=c99 +$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CLANGFLAGS,$(c)))) + +# Enable C++11, or C++17 if HPX threading is enabled. +# If building a plugin, do not set any default C++ standard. +ifeq ($(PLUGIN_NAME),) +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +CXXLANGFLAGS := -std=c++17 +else +CXXLANGFLAGS := -std=c++11 +endif +else +CXXLANGFLAGS := +endif + +# --- C Preprocessor flags --- + +# Enable clock_gettime() in time.h. +CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L +# Enable ip_mreq on macOS which is needed for ASIO which is needed for HPX. +ifeq ($(OS_NAME),Darwin) +CPPROCFLAGS += -D_DARWIN_C_SOURCE +endif +$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CPPROCFLAGS,$(c)))) + +# --- AddressSanitizer flags --- + +ifeq ($(MK_ENABLE_ASAN),yes) +BUILD_ASANFLAGS := -fsanitize=address +else +BUILD_ASANFLAGS := +endif + +# --- Threading flags --- + +# NOTE: We don't have to explicitly omit -pthread when --disable-system is given +# since that option forces --enable-threading=single, and thus -pthread never +# gets added to begin with. + +CTHREADFLAGS := +CXXTHREADFLAGS := + +ifeq ($(CC_VENDOR),gcc) +#ifneq ($(findstring auto,$(THREADING_MODEL)),) +#THREADING_MODEL := openmp +#endif +ifneq ($(findstring openmp,$(THREADING_MODEL)),) +CTHREADFLAGS += -fopenmp +LDFLAGS += -fopenmp +endif +ifneq ($(findstring pthreads,$(THREADING_MODEL)),) +CTHREADFLAGS += -pthread +LDFLAGS += $(LIBPTHREAD) +endif +endif + +ifeq ($(CC_VENDOR),icc) +#ifneq ($(findstring auto,$(THREADING_MODEL)),) +#THREADING_MODEL := openmp +#endif +ifneq ($(findstring openmp,$(THREADING_MODEL)),) +CTHREADFLAGS += -fopenmp +LDFLAGS += -fopenmp +endif +ifneq ($(findstring pthreads,$(THREADING_MODEL)),) +CTHREADFLAGS += -pthread +LDFLAGS += $(LIBPTHREAD) +endif +endif + +ifeq ($(CC_VENDOR),clang) +#ifneq ($(findstring auto,$(THREADING_MODEL)),) +#THREADING_MODEL := pthreads +#endif +ifneq ($(findstring openmp,$(THREADING_MODEL)),) +CTHREADFLAGS += -fopenmp +LDFLAGS += -fopenmp +endif +ifneq ($(findstring pthreads,$(THREADING_MODEL)),) +CTHREADFLAGS += -pthread +LDFLAGS += $(LIBPTHREAD) +endif +endif + +# Threading flags for HPX. +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +HPX_CXXFLAGS := $(shell pkg-config --cflags hpx_component) +HPX_LDFLAGS := $(filter-out -shared,$(shell pkg-config --libs hpx_component)) +CTHREADFLAGS += $(filter-out -std=%,$(HPX_CXXFLAGS)) +LDFLAGS += $(HPX_LDFLAGS) +ifeq ($(OS_NAME),Darwin) +RPATH_PREFIX := -Wl,-rpath, +LDFLAGS += $(patsubst -L%,$(RPATH_PREFIX)%,$(filter -L%,$(HPX_LDFLAGS))) +endif +endif + +# --- #pragma omp simd flags (used for reference kernels only) --- + +ifeq ($(PRAGMA_OMP_SIMD),yes) +ifeq ($(CC_VENDOR),gcc) +COMPSIMDFLAGS := -fopenmp-simd +else +ifeq ($(CC_VENDOR),clang) +COMPSIMDFLAGS := -fopenmp-simd +else +ifeq ($(CC_VENDOR),icc) +COMPSIMDFLAGS := -qopenmp-simd +endif +endif +endif +else # ifeq ($(PRAGMA_OMP_SIMD),no) +COMPSIMDFLAGS := +endif + + + +# +# --- Adjust verbosity level manually using make V=[0,1] ----------------------- +# + +ifeq ($(V),1) +ENABLE_VERBOSE := yes +BLIS_ENABLE_TEST_OUTPUT := yes +endif + +ifeq ($(V),0) +ENABLE_VERBOSE := no +BLIS_ENABLE_TEST_OUTPUT := no +endif + + + +# +# --- Append OS-specific libraries to LDFLAGS ---------------------------------- +# + +ifeq ($(OS_NAME),Linux) +# Exclude -lrt on Android by detecting Bionic. +# printf *must* be used here rather than echo -e +BIONIC := $(findstring bionic,$(shell printf "\#ifdef __BIONIC__\nbionic\n\#endif" | $(CC) -E -)) +ifeq (,$(BIONIC)) +LDFLAGS += -lrt +endif +endif + + + +# +# --- LDFLAGS cleanup ---------------------------------------------------------- +# + + + +# +# --- Include makefile fragments ----------------------------------------------- +# + +# Initialize our list of directory paths to makefile fragments with the empty +# list. This variable will accumulate all of the directory paths in which +# makefile fragments reside. +FRAGMENT_DIR_PATHS := + +# Initialize our makefile variables that source code files will be accumulated +# into by the makefile fragments. This initialization is very important! These +# variables will end up with weird contents if we don't initialize them to +# empty prior to recursively including the makefile fragments. +MK_CONFIG_SRC := +MK_KERNELS_SRC := +MK_REFKERN_SRC := +MK_FRAME_SRC := +MK_ADDON_SRC := +MK_SANDBOX_SRC := + +# -- config -- + +# Construct paths to each of the sub-configurations specified in the +# configuration list. Note that we use CONFIG_LIST_FAM, which already +# has CONFIG_NAME included (with duplicates removed). +CONFIG_PATHS := $(addprefix $(CONFIG_FRAG_PATH)/, $(CONFIG_LIST_FAM)) + +# This variable is used by the include statements as they recursively include +# one another. For the 'config' directory, we initialize it to that directory +# in preparation to include the fragments in the configuration sub-directory. +PARENT_SRC_PATH := $(CONFIG_PATH) +PARENT_PATH := $(CONFIG_FRAG_PATH) + +# Recursively include the makefile fragments in each of the sub-configuration +# directories. +-include $(addsuffix /$(FRAGMENT_MK), $(CONFIG_PATHS)) + +# -- kernels -- + +# Construct paths to each of the kernel sets required by the sub-configurations +# in the configuration list. +KERNEL_PATHS := $(addprefix $(KERNELS_FRAG_PATH)/, $(KERNEL_LIST)) + +# This variable is used by the include statements as they recursively include +# one another. For the 'kernels' directory, we initialize it to that directory +# in preparation to include the fragments in the configuration sub-directory. +PARENT_SRC_PATH := $(KERNELS_PATH) +PARENT_PATH := $(KERNELS_FRAG_PATH) + +# Recursively include the makefile fragments in each of the kernels sub- +# directories. +-include $(addsuffix /$(FRAGMENT_MK), $(KERNEL_PATHS)) + +# -- ref_kernels -- +# -- frame -- + +# This variable is used by the include statements as they recursively include +# one another. For the framework and reference kernel source trees (ie: the +# 'frame' and 'ref_kernels' directories), we initialize it to the top-level +# directory since that is its parent. +PARENT_SRC_PATH := $(DIST_PATH) +PARENT_PATH := $(OBJ_DIR)/$(CONFIG_NAME) + +# Recursively include all the makefile fragments in the directories for the +# reference kernels and portable framework. +-include $(addsuffix /$(FRAGMENT_MK), $(REFKERN_FRAG_PATH)) +-include $(addsuffix /$(FRAGMENT_MK), $(FRAME_FRAG_PATH)) + +# -- addon -- + +# Construct paths to each addon. +# NOTE: If $(ADDON_LIST) is empty (because no addon was enabled at configure- +# time) then $(ADDON_PATHS) will also be empty, which will cause no fragments +# to be included. +ADDON_PATHS := $(addprefix $(ADDON_FRAG_PATH)/, $(ADDON_LIST)) + +# This variable is used by the include statements as they recursively include +# one another. For the 'addons' directory, we initialize it to that directory +# in preparation to include the fragments in the configuration sub-directory. +PARENT_SRC_PATH := $(ADDON_PATH) +PARENT_PATH := $(ADDON_FRAG_PATH) + +# Recursively include the makefile fragments in each of the addons sub- +# directories. +-include $(addsuffix /$(FRAGMENT_MK), $(ADDON_PATHS)) + +# -- sandbox -- + +# Construct paths to each sandbox. (At present, there can be only one.) +# NOTE: If $(SANDBOX) is empty (because no sandbox was enabled at configure- +# time) then $(SANDBOX_PATHS) will also be empty, which will cause no +# fragments to be included. +SANDBOX_PATHS := $(addprefix $(SANDBOX_FRAG_PATH)/, $(SANDBOX)) + +# This variable is used by the include statements as they recursively include +# one another. For the 'sandbox' directory, we initialize it to that directory +# in preparation to include the fragments in the configuration sub-directory. +PARENT_SRC_PATH := $(SANDBOX_PATH) +PARENT_PATH := $(SANDBOX_FRAG_PATH) + +# Recursively include the makefile fragments in the sandbox sub-directory. +-include $(addsuffix /$(FRAGMENT_MK), $(SANDBOX_PATHS)) + +# -- post-processing -- + +# Create a list of the makefile fragments using the variable into which each +# of the above include statements accumulated their directory paths. +MAKEFILE_FRAGMENTS := $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_DIR_PATHS)) + +# Detect whether we actually got any makefile fragments. If we didn't, then it +# is likely that the user has not yet generated them (via configure). +ifeq ($(strip $(MAKEFILE_FRAGMENTS)),) +MAKEFILE_FRAGMENTS_PRESENT := no +else +MAKEFILE_FRAGMENTS_PRESENT := yes +endif + + +# +# --- Important sets of header files and paths --------------------------------- +# + +# Define a function that will expand all of the directory paths given in $(1) +# to actual filepaths using the list of suffixes provided in $(2). +get-filepaths = $(strip $(foreach path, $(1), \ + $(foreach suf, $(2), \ + $(wildcard $(path)/*.$(suf)) \ + ) ) ) + +# Define a function that will expand all of the directory paths given in $(1) +# to actual filepaths using the list of suffixes provided in $(2), taking only +# the first expansion from each directory with at least one file matching +# the current suffix. Finally, strip the filenames from all resulting files, +# returning only the directory paths. +get-dirpaths = $(dir $(foreach path, $(1), \ + $(firstword \ + $(foreach suf, $(2), \ + $(wildcard $(path)/*.$(suf)) \ + ) ) ) ) + +# We'll use three directory lists. The first is a list of all of the directories +# in which makefile fragments were generated, plus the current directory. (The +# current directory is needed so we include bli_config.h and bli_addon.h in the +# processing of header files.) The second and third are subsets of the first +# that begins with the addon and sandbox root paths, respectively. +ALLFRAG_DIR_PATHS := . $(FRAGMENT_DIR_PATHS) +ADDON_DIR_PATHS := $(filter $(ADDON_PATH)/%,$(ALLFRAG_DIR_PATHS)) +SANDBOX_DIR_PATHS := $(filter $(SANDBOX_PATH)/%,$(ALLFRAG_DIR_PATHS)) + +ALL_H99_FILES := $(call get-filepaths,$(ALLFRAG_DIR_PATHS),$(ALL_H99_SUFS)) +FRAME_H99_FILES := $(filter-out $(ADDON_PATH)/%, \ + $(filter-out $(SANDBOX_PATH)/%, \ + $(ALL_H99_FILES) \ + ) ) + +ALL_H99_DIRPATHS := $(call get-dirpaths,$(ALLFRAG_DIR_PATHS),$(ALL_H99_SUFS)) + +ADDON_H99_FILES := $(call get-filepaths,$(ADDON_DIR_PATHS),$(ADDON_H99_SUFS)) +ADDON_HXX_FILES := $(call get-filepaths,$(ADDON_DIR_PATHS),$(ADDON_HXX_SUFS)) +ADDON_HDR_DIRPATHS := $(call get-dirpaths,$(ADDON_DIR_PATHS),$(ALL_HDR_SUFS)) + +SANDBOX_H99_FILES := $(call get-filepaths,$(SANDBOX_DIR_PATHS),$(SANDBOX_H99_SUFS)) +SANDBOX_HXX_FILES := $(call get-filepaths,$(SANDBOX_DIR_PATHS),$(SANDBOX_HXX_SUFS)) +SANDBOX_HDR_DIRPATHS := $(call get-dirpaths,$(SANDBOX_DIR_PATHS),$(ALL_HDR_SUFS)) + + + +# +# --- blis.h header definitions ------------------------------------------------ +# + +# Use include/CONFIG_NAME as the default path to the local header files, but +# allow the includer to override this value if it needs to point to an +# installation directory. +ifeq ($(strip $(INC_PATH)),) +INC_PATH := $(INCLUDE_DIR)/$(CONFIG_NAME) +endif + +# Identify the base path for the intermediate include directory. We define +# this path in terms of INC_PATH, which gets a default value above (which is +# what happens for the top-level Makefile). If INC_PATH is specified by the +# Makefile prior to including common.mk, that path is used instead. This +# allows Makefiles for example code and test drivers to reference an installed +# prefix directory for situations when the build directory no longer exists. +BASE_INC_PATH := $(INC_PATH) + +# Isolate the path to blis.h by filtering the file from the list of framework +# header files. +BLIS_H := blis.h +BLIS_H_SRC_PATH := $(filter %/$(BLIS_H), $(FRAME_H99_FILES)) + +# Construct the path to what will be the intermediate flattened/monolithic +# blis.h file. +BLIS_H_FLAT := $(BASE_INC_PATH)/$(BLIS_H) + +# Construct the path to the helper blis.h file that will reside one directory +# up from the installed copy of blis.h. +HELP_BLIS_H_PATH := $(BUILD_DIR)/$(BLIS_H) + + +# +# --- cblas.h header definitions ----------------------------------------------- +# + +# Isolate the path to cblas.h by filtering the file from the list of framework +# header files, and then strip the filename to obtain the directory in which +# cblas.h resides. +CBLAS_H := cblas.h +CBLAS_H_SRC_PATH := $(filter %/$(CBLAS_H), $(FRAME_H99_FILES)) +CBLAS_H_DIRPATH := $(dir $(CBLAS_H_SRC_PATH)) + +# Construct the path to what will be the intermediate flattened/monolithic +# cblas.h file. +CBLAS_H_FLAT := $(BASE_INC_PATH)/$(CBLAS_H) + +# Construct the path to the helper cblas.h file that will reside one directory +# up from the installed copy of cblas.h. +HELP_CBLAS_H_PATH := $(BUILD_DIR)/$(CBLAS_H) + + +# +# --- Compiler include path definitions ---------------------------------------- +# + +# Obtain a list of header files #included inside of the bli_cntx_ref.c file. +# Due to the way that bli_cntx_ref.c uses headers and macros, paths to these +# files will be needed when compiling bli_cntx_ref.c with the monolithic header. +ifeq ($(strip $(SHARE_PATH)),.) +REF_KER_SRC := $(DIST_PATH)/$(REFKERN_DIR)/bli_cntx_ref.c +# +# NOTE: A redirect to /dev/null has been added to the grep command below because +# as of version 3.8, grep outputs warnings when encountering stray backslashes +# in regular expressions [1]. Versions older than 3.8 not only do not complain, +# but actually seem to *require* the backslash, perhaps because of the way we +# are invoking grep via GNU make's shell command. WHEN DEBUGGING ANYTHING +# INVOLVING THE MAKE VARIABLE BELOW, PLEASE CONSIDER TEMPORARILY REMOVING THE +# REDIRECT TO /dev/null SO THAT YOU SEE ANY MESSAGES SENT TO STANDARD ERROR. +# +# [1] https://lists.gnu.org/archive/html/info-gnu/2022-09/msg00001.html +# +REF_KER_HEADERS := $(shell $(GREP) "\#include" $(REF_KER_SRC) 2> $(DEVNULL) | sed -e "s/\#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g" | $(GREP) -v $(BLIS_H)) +endif + +# Match each header found above with the path to that header, and then strip +# leading, trailing, and internal whitespace. +REF_KER_H_PATHS := $(call rm-dups,$(strip \ + $(foreach header, $(REF_KER_HEADERS), \ + $(dir $(filter %/$(header), \ + $(FRAME_H99_FILES)))))) + +# Add -I to each header path so we can specify our include search paths to the +# C compiler. Then add frame/include since it's needed when compiling source +# files that #include bli_oapi_ba.h or bli_oapi_ex.h. +REF_KER_I_PATHS := $(strip $(patsubst %, -I%, $(REF_KER_H_PATHS))) +REF_KER_I_PATHS += -I$(DIST_PATH)/frame/include + +# Prefix the paths above with the base include path. +# NOTE: We no longer need every header path in the source tree since we +# now #include the monolithic/flattened blis.h instead. +CINCFLAGS := -I$(BASE_INC_PATH) $(REF_KER_I_PATHS) + +# If CBLAS is enabled, we also include the path to the cblas.h directory so +# that the compiler will be able to find cblas.h as the CBLAS source code is +# being compiled. +ifeq ($(MK_ENABLE_CBLAS),yes) +CINCFLAGS += -I$(CBLAS_H_DIRPATH) +endif + +# Obtain a list of header paths in the configured addons. Then add -I to each +# header path. +CADDONINCFLAGS := $(strip $(patsubst %, -I%, $(ADDON_HDR_DIRPATHS))) + +# Obtain a list of header paths in the configured sandbox. Then add -I to each +# header path. +CSANDINCFLAGS := $(strip $(patsubst %, -I%, $(SANDBOX_HDR_DIRPATHS))) + + +# +# --- BLIS configuration header definitions ------------------------------------ +# + +# These files were created by configure, but we need to define them here so we +# can remove them as part of the clean targets. +BLIS_ADDON_H := ./bli_addon.h +BLIS_CONFIG_H := ./bli_config.h + + +# +# --- Special preprocessor macro definitions ----------------------------------- +# + +# Define a C preprocessor flag that is *only* defined when BLIS is being +# compiled. (In other words, an application that #includes blis.h will not +# get this cpp macro.) +BUILD_CPPFLAGS := -DBLIS_IS_BUILDING_LIBRARY + + +# +# --- configure file location -------------------------------------------------- +# + +CONFIGURE_FILE := $(DIST_PATH)/configure + + +# end of ifndef COMMON_MK_INCLUDED conditional block +endif diff --git a/share/blis/config/a64fx/bli_kernel_defs_a64fx.h b/share/blis/config/a64fx/bli_kernel_defs_a64fx.h new file mode 100644 index 0000000000..2c5c972049 --- /dev/null +++ b/share/blis/config/a64fx/bli_kernel_defs_a64fx.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 32 +#define BLIS_MR_d 16 +#define BLIS_MR_c 16 +#define BLIS_MR_z 8 + +#define BLIS_NR_s 10 +#define BLIS_NR_d 10 +#define BLIS_NR_c 10 +#define BLIS_NR_z 10 + +//#endif + diff --git a/share/blis/config/a64fx/make_defs.mk b/share/blis/config/a64fx/make_defs.mk new file mode 100644 index 0000000000..5cc8162ba8 --- /dev/null +++ b/share/blis/config/a64fx/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := a64fx +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE -D_A64FX +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 -ftree-vectorize -march=armv8-a+sve +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/altra/bli_kernel_defs_altra.h b/share/blis/config/altra/bli_kernel_defs_altra.h new file mode 100644 index 0000000000..815c593993 --- /dev/null +++ b/share/blis/config/altra/bli_kernel_defs_altra.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/altra/make_defs.mk b/share/blis/config/altra/make_defs.mk new file mode 100644 index 0000000000..ef1e337db6 --- /dev/null +++ b/share/blis/config/altra/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := altra +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=neoverse-n1 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=neoverse-n1 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=neoverse-n1 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/altramax/bli_kernel_defs_altramax.h b/share/blis/config/altramax/bli_kernel_defs_altramax.h new file mode 100644 index 0000000000..815c593993 --- /dev/null +++ b/share/blis/config/altramax/bli_kernel_defs_altramax.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/altramax/make_defs.mk b/share/blis/config/altramax/make_defs.mk new file mode 100644 index 0000000000..35bd7de489 --- /dev/null +++ b/share/blis/config/altramax/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := altramax +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=neoverse-n1 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=neoverse-n1 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=neoverse-n1 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/amd64/make_defs.mk b/share/blis/config/amd64/make_defs.mk new file mode 100644 index 0000000000..bbe4d8d5f6 --- /dev/null +++ b/share/blis/config/amd64/make_defs.mk @@ -0,0 +1,69 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := amd64 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Setting for reference and optimized kernels are taken from individual +# subconfiguration makefile fragments in this family. + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/amd64_legacy/make_defs.mk b/share/blis/config/amd64_legacy/make_defs.mk new file mode 100644 index 0000000000..914f533ae0 --- /dev/null +++ b/share/blis/config/amd64_legacy/make_defs.mk @@ -0,0 +1,70 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := amd64_legacy +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Setting for reference and optimized kernels are taken from individual +# subconfiguration makefile fragments in this family. + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/arm32/make_defs.mk b/share/blis/config/arm32/make_defs.mk new file mode 100644 index 0000000000..ee95296386 --- /dev/null +++ b/share/blis/config/arm32/make_defs.mk @@ -0,0 +1,86 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := arm32 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := -mfloat-abi=hard -mfpu=neon +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -march=armv7-a +else +$(error gcc is required for this configuration.) +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/arm64/make_defs.mk b/share/blis/config/arm64/make_defs.mk new file mode 100644 index 0000000000..1f8c2e84b5 --- /dev/null +++ b/share/blis/config/arm64/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := arm64 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -march=armv8-a +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -march=armv8-a +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/armsve/bli_kernel_defs_armsve.h b/share/blis/config/armsve/bli_kernel_defs_armsve.h new file mode 100644 index 0000000000..8c9c0b0dd6 --- /dev/null +++ b/share/blis/config/armsve/bli_kernel_defs_armsve.h @@ -0,0 +1,58 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +// +// The armsve configuration handles both 256-bit and 512-bit SVE vectors, +// so it is not possible to define specific register block sizes. Thus, +// armsve can't use reference kernels! +// + +#define BLIS_MR_s -1 +#define BLIS_MR_d -1 +#define BLIS_MR_c -1 +#define BLIS_MR_z -1 + +#define BLIS_NR_s 10 +#define BLIS_NR_d 10 +#define BLIS_NR_c 10 +#define BLIS_NR_z 10 + +//#endif + diff --git a/share/blis/config/armsve/make_defs.mk b/share/blis/config/armsve/make_defs.mk new file mode 100644 index 0000000000..340b52f316 --- /dev/null +++ b/share/blis/config/armsve/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := armsve +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 -ftree-vectorize -march=armv8-a+sve +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/bgq/bli_kernel_defs_bgq.h b/share/blis/config/bgq/bli_kernel_defs_bgq.h new file mode 100644 index 0000000000..bd3962e45a --- /dev/null +++ b/share/blis/config/bgq/bli_kernel_defs_bgq.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_d 8 +#define BLIS_MR_z 4 + +#define BLIS_NR_d 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/bgq/make_defs.mk b/share/blis/config/bgq/make_defs.mk new file mode 100644 index 0000000000..fa4479956b --- /dev/null +++ b/share/blis/config/bgq/make_defs.mk @@ -0,0 +1,102 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := bgq +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +#ifeq ($(CC),) +#CC := /bgsys/drivers/ppcfloor/comm/gcc.legacy/bin/mpixlc_r +#CC_VENDOR := ibm +#endif + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -I/bgsys/drivers/ppcfloor -I/bgsys/drivers/ppcfloor/spi/include/kernel/cnk +ifeq ($(CC_VENDOR),ibm) +CMISCFLAGS := -qthreaded -qsmp=omp -qasm=gcc -qkeyword=asm # -qreport -qsource -qlistopt -qlist +else ifeq ($(CC_VENDOR),clang) +CMISCFLAGS := -fopenmp +else +$(error xlc or bgclang is required for this configuration.) +endif +CPICFLAGS := -fPIC +CWARNFLAGS := -w + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),ibm) +CKVECFLAGS := -qarch=qp -qtune=qp -qsimd=auto -qhot=level=1 -qprefetch -qunroll=yes -qnoipa +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Override the default value for LDFLAGS. +ifeq ($(CC_VENDOR),ibm) +LDFLAGS := -L/bgsys/drivers/ppcfloor/spi/lib -lSPI -lSPI_cnk -qthreaded -qsmp=omp +else ifeq ($(CC_VENDOR),clang) +LDFLAGS := -L/bgsys/drivers/ppcfloor/spi/lib -lSPI -lSPI_cnk -fopenmp +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/bulldozer/bli_kernel_defs_bulldozer.h b/share/blis/config/bulldozer/bli_kernel_defs_bulldozer.h new file mode 100644 index 0000000000..ea1e58e66b --- /dev/null +++ b/share/blis/config/bulldozer/bli_kernel_defs_bulldozer.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 4 +#define BLIS_MR_c 8 +#define BLIS_MR_z 4 + +#define BLIS_NR_s 8 +#define BLIS_NR_d 6 +#define BLIS_NR_c 4 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/bulldozer/make_defs.mk b/share/blis/config/bulldozer/make_defs.mk new file mode 100644 index 0000000000..e3e2088622 --- /dev/null +++ b/share/blis/config/bulldozer/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := bulldozer +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mfpmath=sse -mavx -mfma4 -march=bdver1 -mno-tbm -mno-xop -mno-lwp +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mfpmath=sse -mavx -mfma4 -march=bdver1 -mno-tbm -mno-xop -mno-lwp +else +$(error gcc or clang are required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/cortexa15/bli_kernel_defs_cortexa15.h b/share/blis/config/cortexa15/bli_kernel_defs_cortexa15.h new file mode 100644 index 0000000000..9c413f7f84 --- /dev/null +++ b/share/blis/config/cortexa15/bli_kernel_defs_cortexa15.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 4 +#define BLIS_MR_d 4 + +#define BLIS_NR_s 4 +#define BLIS_NR_d 4 + +//#endif + diff --git a/share/blis/config/cortexa15/make_defs.mk b/share/blis/config/cortexa15/make_defs.mk new file mode 100644 index 0000000000..3a9a83b39d --- /dev/null +++ b/share/blis/config/cortexa15/make_defs.mk @@ -0,0 +1,86 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := cortexa15 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := -mfloat-abi=hard -mfpu=neon +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=cortex-a15 +else +$(error gcc is required for this configuration.) +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/cortexa53/bli_kernel_defs_cortexa53.h b/share/blis/config/cortexa53/bli_kernel_defs_cortexa53.h new file mode 100644 index 0000000000..60292099cc --- /dev/null +++ b/share/blis/config/cortexa53/bli_kernel_defs_cortexa53.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/cortexa53/make_defs.mk b/share/blis/config/cortexa53/make_defs.mk new file mode 100644 index 0000000000..6036ea55a4 --- /dev/null +++ b/share/blis/config/cortexa53/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := cortexa53 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=cortex-a53 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=cortex-a53 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=cortex-a53 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/cortexa57/bli_kernel_defs_cortexa57.h b/share/blis/config/cortexa57/bli_kernel_defs_cortexa57.h new file mode 100644 index 0000000000..60292099cc --- /dev/null +++ b/share/blis/config/cortexa57/bli_kernel_defs_cortexa57.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/cortexa57/make_defs.mk b/share/blis/config/cortexa57/make_defs.mk new file mode 100644 index 0000000000..d84f8538a5 --- /dev/null +++ b/share/blis/config/cortexa57/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := cortexa57 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=cortex-a57 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=cortex-a57 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=cortex-a57 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/cortexa9/bli_kernel_defs_cortexa9.h b/share/blis/config/cortexa9/bli_kernel_defs_cortexa9.h new file mode 100644 index 0000000000..9c413f7f84 --- /dev/null +++ b/share/blis/config/cortexa9/bli_kernel_defs_cortexa9.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 4 +#define BLIS_MR_d 4 + +#define BLIS_NR_s 4 +#define BLIS_NR_d 4 + +//#endif + diff --git a/share/blis/config/cortexa9/make_defs.mk b/share/blis/config/cortexa9/make_defs.mk new file mode 100644 index 0000000000..f5f19e5309 --- /dev/null +++ b/share/blis/config/cortexa9/make_defs.mk @@ -0,0 +1,86 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := cortexa9 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := -mfloat-abi=hard -mfpu=neon +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=cortex-a9 +else +$(error gcc is required for this configuration.) +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/excavator/bli_kernel_defs_excavator.h b/share/blis/config/excavator/bli_kernel_defs_excavator.h new file mode 100644 index 0000000000..df4a8c4118 --- /dev/null +++ b/share/blis/config/excavator/bli_kernel_defs_excavator.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 16 +#define BLIS_MR_d 8 +#define BLIS_MR_c 4 +#define BLIS_MR_z 2 + +#define BLIS_NR_s 3 +#define BLIS_NR_d 3 +#define BLIS_NR_c 2 +#define BLIS_NR_z 2 + +//#endif + diff --git a/share/blis/config/excavator/make_defs.mk b/share/blis/config/excavator/make_defs.mk new file mode 100644 index 0000000000..7977806b22 --- /dev/null +++ b/share/blis/config/excavator/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := excavator +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +$(error gcc or clang are required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/firestorm/bli_kernel_defs_firestorm.h b/share/blis/config/firestorm/bli_kernel_defs_firestorm.h new file mode 100644 index 0000000000..60292099cc --- /dev/null +++ b/share/blis/config/firestorm/bli_kernel_defs_firestorm.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/firestorm/make_defs.mk b/share/blis/config/firestorm/make_defs.mk new file mode 100644 index 0000000000..2353e0040e --- /dev/null +++ b/share/blis/config/firestorm/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := firestorm +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -march=armv8-a +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +CKVECFLAGS := -march=armv8-a + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/generic/bli_kernel_defs_generic.h b/share/blis/config/generic/bli_kernel_defs_generic.h new file mode 100644 index 0000000000..db2f32947b --- /dev/null +++ b/share/blis/config/generic/bli_kernel_defs_generic.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +//#endif + diff --git a/share/blis/config/generic/make_defs.mk b/share/blis/config/generic/make_defs.mk new file mode 100644 index 0000000000..cbe4fb86f7 --- /dev/null +++ b/share/blis/config/generic/make_defs.mk @@ -0,0 +1,98 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := generic +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),nvc) +CKVECFLAGS := +else +$(error gcc, icc, nvc, or clang is required for this configuration.) +endif +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/haswell/bli_kernel_defs_haswell.h b/share/blis/config/haswell/bli_kernel_defs_haswell.h new file mode 100644 index 0000000000..c5bc8d63f3 --- /dev/null +++ b/share/blis/config/haswell/bli_kernel_defs_haswell.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 6 +#define BLIS_MR_d 6 +#define BLIS_MR_c 3 +#define BLIS_MR_z 3 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 +#define BLIS_NR_c 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/haswell/make_defs.mk b/share/blis/config/haswell/make_defs.mk new file mode 100644 index 0000000000..6f7b5b49a9 --- /dev/null +++ b/share/blis/config/haswell/make_defs.mk @@ -0,0 +1,100 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := haswell +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -O3 -fomit-frame-pointer +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell +ifeq ($(GCC_OT_4_9_0),yes) +# If gcc is older than 4.9.0, we must use a different label for -march. +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=core-avx2 +endif +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xCORE-AVX2 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/intel64/make_defs.mk b/share/blis/config/intel64/make_defs.mk new file mode 100644 index 0000000000..3f62cef572 --- /dev/null +++ b/share/blis/config/intel64/make_defs.mk @@ -0,0 +1,94 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := intel64 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xSSSE3 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/knl/bli_kernel_defs_knl.h b/share/blis/config/knl/bli_kernel_defs_knl.h new file mode 100644 index 0000000000..ce514bb21a --- /dev/null +++ b/share/blis/config/knl/bli_kernel_defs_knl.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 24 +#define BLIS_MR_d 24 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/knl/make_defs.mk b/share/blis/config/knl/make_defs.mk new file mode 100644 index 0000000000..5458745b9c --- /dev/null +++ b/share/blis/config/knl/make_defs.mk @@ -0,0 +1,118 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := knl +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +ifeq ($(DEBUG_TYPE),sde) +# Unconditionally disable use of libmemkind in Intel SDE. +# Note: The BLIS_DISABLE_MEMKIND macro definition will override +# (undefine) the BLIS_ENABLE_MEMKIND macro definition. +CPPROCFLAGS += -DBLIS_DISABLE_MEMKIND +# This value is normally set by configure and communicated to make via +# config.mk, however, the make_defs.mk files (this file) get included +# after config.mk, so this definition will override that earlier +# definition. +MK_ENABLE_MEMKIND := no +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mavx512f -mavx512pf -mfpmath=sse -march=knl +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xMIC-AVX512 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mavx512f -mavx512pf -mfpmath=sse -march=knl +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# The assembler on OS X won't recognize AVX512 without help. +ifneq ($(CC_VENDOR),icc) +ifeq ($(OS_NAME),Darwin) +CKVECFLAGS += -Wa,-march=knl +endif +endif + +# Flags specific to reference kernels. +# Note: We use AVX2 for reference kernels instead of AVX-512. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),icc) +CRVECFLAGS := -xMIC-AVX512 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/penryn/bli_kernel_defs_penryn.h b/share/blis/config/penryn/bli_kernel_defs_penryn.h new file mode 100644 index 0000000000..f1e483646a --- /dev/null +++ b/share/blis/config/penryn/bli_kernel_defs_penryn.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 4 + +#define BLIS_NR_s 4 +#define BLIS_NR_d 4 + +//#endif + diff --git a/share/blis/config/penryn/make_defs.mk b/share/blis/config/penryn/make_defs.mk new file mode 100644 index 0000000000..d070b7f1ae --- /dev/null +++ b/share/blis/config/penryn/make_defs.mk @@ -0,0 +1,94 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := penryn +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xSSSE3 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/piledriver/bli_kernel_defs_piledriver.h b/share/blis/config/piledriver/bli_kernel_defs_piledriver.h new file mode 100644 index 0000000000..df4a8c4118 --- /dev/null +++ b/share/blis/config/piledriver/bli_kernel_defs_piledriver.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 16 +#define BLIS_MR_d 8 +#define BLIS_MR_c 4 +#define BLIS_MR_z 2 + +#define BLIS_NR_s 3 +#define BLIS_NR_d 3 +#define BLIS_NR_c 2 +#define BLIS_NR_z 2 + +//#endif + diff --git a/share/blis/config/piledriver/make_defs.mk b/share/blis/config/piledriver/make_defs.mk new file mode 100644 index 0000000000..56b7d0fc51 --- /dev/null +++ b/share/blis/config/piledriver/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := piledriver +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +$(error gcc or clang are required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/power/make_defs.mk b/share/blis/config/power/make_defs.mk new file mode 100644 index 0000000000..8350a0a5c0 --- /dev/null +++ b/share/blis/config/power/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := power +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/power10/bli_kernel_defs_power10.h b/share/blis/config/power10/bli_kernel_defs_power10.h new file mode 100644 index 0000000000..9b47a77c0a --- /dev/null +++ b/share/blis/config/power10/bli_kernel_defs_power10.h @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 8 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 + + +//#endif + diff --git a/share/blis/config/power10/make_defs.mk b/share/blis/config/power10/make_defs.mk new file mode 100644 index 0000000000..191a3e42a8 --- /dev/null +++ b/share/blis/config/power10/make_defs.mk @@ -0,0 +1,83 @@ + +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2019, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := power10 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=power10 -mtune=power10 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=power10 -mtune=power10 +else +$(info $(CC_VENDOR)) +$(error gcc, clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/power9/bli_kernel_defs_power9.h b/share/blis/config/power9/bli_kernel_defs_power9.h new file mode 100644 index 0000000000..debfeac5fc --- /dev/null +++ b/share/blis/config/power9/bli_kernel_defs_power9.h @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_d 12 + +#define BLIS_NR_d 6 + +#define BLIS_BBN_s 4 +#define BLIS_BBN_d 2 + +//#endif + diff --git a/share/blis/config/power9/make_defs.mk b/share/blis/config/power9/make_defs.mk new file mode 100644 index 0000000000..9f604a6074 --- /dev/null +++ b/share/blis/config/power9/make_defs.mk @@ -0,0 +1,84 @@ + +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2019, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := power9 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=power9 -mtune=power9 -DXLC=0 +else +ifeq ($(CC_VENDOR),IBM) +CKVECFLAGS := -qarch=pwr9 -qtune=pwr9 -DXLC=1 +else +$(info $(CC_VENDOR)) +$(error gcc/xlc is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/rv32i/bli_kernel_defs_rv32i.h b/share/blis/config/rv32i/bli_kernel_defs_rv32i.h new file mode 100644 index 0000000000..fe51f998da --- /dev/null +++ b/share/blis/config/rv32i/bli_kernel_defs_rv32i.h @@ -0,0 +1,43 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +// Fall through to generic sizes + +//#endif diff --git a/share/blis/config/rv32i/make_defs.mk b/share/blis/config/rv32i/make_defs.mk new file mode 100644 index 0000000000..21128717f3 --- /dev/null +++ b/share/blis/config/rv32i/make_defs.mk @@ -0,0 +1,102 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rv32i +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=32 + +RISCV_ARCH := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 32,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 32,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/rv32iv/bli_kernel_defs_rv32iv.h b/share/blis/config/rv32iv/bli_kernel_defs_rv32iv.h new file mode 100644 index 0000000000..b179892085 --- /dev/null +++ b/share/blis/config/rv32iv/bli_kernel_defs_rv32iv.h @@ -0,0 +1,43 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + + + +//#endif diff --git a/share/blis/config/rv32iv/make_defs.mk b/share/blis/config/rv32iv/make_defs.mk new file mode 100644 index 0000000000..9daaee3d68 --- /dev/null +++ b/share/blis/config/rv32iv/make_defs.mk @@ -0,0 +1,104 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rv32iv +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=32 + +RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 32,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 32,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O0 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +# Lower compiler optimization to -O1. At -O3, gcc version 12.0.1 20220505 +# computes offsets for the matrix ab in the ref gemm kernel incorrectly. +CRVECFLAGS := $(CKVECFLAGS) -O1 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/rv64i/bli_kernel_defs_rv64i.h b/share/blis/config/rv64i/bli_kernel_defs_rv64i.h new file mode 100644 index 0000000000..fe51f998da --- /dev/null +++ b/share/blis/config/rv64i/bli_kernel_defs_rv64i.h @@ -0,0 +1,43 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +// Fall through to generic sizes + +//#endif diff --git a/share/blis/config/rv64i/make_defs.mk b/share/blis/config/rv64i/make_defs.mk new file mode 100644 index 0000000000..7c055f0128 --- /dev/null +++ b/share/blis/config/rv64i/make_defs.mk @@ -0,0 +1,102 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rv64i +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=64 + +RISCV_ARCH := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 64,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 64,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/rv64iv/bli_kernel_defs_rv64iv.h b/share/blis/config/rv64iv/bli_kernel_defs_rv64iv.h new file mode 100644 index 0000000000..18ca4030e0 --- /dev/null +++ b/share/blis/config/rv64iv/bli_kernel_defs_rv64iv.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + + +//#endif diff --git a/share/blis/config/rv64iv/make_defs.mk b/share/blis/config/rv64iv/make_defs.mk new file mode 100644 index 0000000000..9ec5a889af --- /dev/null +++ b/share/blis/config/rv64iv/make_defs.mk @@ -0,0 +1,103 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rv64iv +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=64 + +RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 64,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 64,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -ftree-vectorize +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +# Lower compiler optimization. cinvscalv fails at -O1 +CRVECFLAGS := $(CKVECFLAGS) -O0 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h b/share/blis/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h new file mode 100644 index 0000000000..18ca4030e0 --- /dev/null +++ b/share/blis/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + + +//#endif diff --git a/share/blis/config/rvv_sg2042/make_defs.mk b/share/blis/config/rvv_sg2042/make_defs.mk new file mode 100644 index 0000000000..9dca0533f0 --- /dev/null +++ b/share/blis/config/rvv_sg2042/make_defs.mk @@ -0,0 +1,103 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rvv_sg2042 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=64 + +RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 64,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 64,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -ftree-vectorize +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +# Lower compiler optimization. cinvscalv fails at -O1 +CRVECFLAGS := $(CKVECFLAGS) -O0 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/sandybridge/bli_kernel_defs_sandybridge.h b/share/blis/config/sandybridge/bli_kernel_defs_sandybridge.h new file mode 100644 index 0000000000..dc1b843f60 --- /dev/null +++ b/share/blis/config/sandybridge/bli_kernel_defs_sandybridge.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 8 +#define BLIS_MR_c 8 +#define BLIS_MR_z 4 + +#define BLIS_NR_s 8 +#define BLIS_NR_d 4 +#define BLIS_NR_c 4 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/sandybridge/make_defs.mk b/share/blis/config/sandybridge/make_defs.mk new file mode 100644 index 0000000000..6047787cda --- /dev/null +++ b/share/blis/config/sandybridge/make_defs.mk @@ -0,0 +1,98 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := sandybridge +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mavx -mfpmath=sse -march=sandybridge +ifeq ($(GCC_OT_4_9_0),yes) +# If gcc is older than 4.9.0, we must use a different label for -march. +CKVECFLAGS := -mavx -mfpmath=sse -march=corei7-avx +endif +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xAVX +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mavx -mfpmath=sse -march=sandybridge +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h b/share/blis/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h new file mode 100644 index 0000000000..33543db50f --- /dev/null +++ b/share/blis/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h @@ -0,0 +1,55 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2024, SiFive, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- +#define BLIS_MR_s 7 +#define BLIS_MR_d 7 +#define BLIS_MR_c 6 +#define BLIS_MR_z 6 + +#define BLIS_PACKMR_s 8 +#define BLIS_PACKMR_d 8 +#define BLIS_PACKMR_c 8 +#define BLIS_PACKMR_z 8 + +#define BLIS_NR_s -1 +#define BLIS_NR_d -1 +#define BLIS_NR_c -1 +#define BLIS_NR_z -1 +//#endif + diff --git a/share/blis/config/sifive_rvv/make_defs.mk b/share/blis/config/sifive_rvv/make_defs.mk new file mode 100644 index 0000000000..a4b3675e15 --- /dev/null +++ b/share/blis/config/sifive_rvv/make_defs.mk @@ -0,0 +1,80 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2024, SiFive, Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := sifive_rvv +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb -mabi=lp64d +CMISCFLAGS_SIFIVE_OTHER := +CPPROCFLAGS := +CMISCFLAGS := $(CMISCFLAGS_SIFIVE) $(CMISCFLAGS_SIFIVE_OTHER) \ + -fdata-sections -ffunction-sections \ + -fdiagnostics-color=always -fno-rtti -fno-exceptions +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wextra -Wno-unused-function -Wno-unused-parameter \ + -Wno-sign-compare -Wno-unused-variable + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/sifive_x280/bli_kernel_defs_sifive_x280.h b/share/blis/config/sifive_x280/bli_kernel_defs_sifive_x280.h new file mode 100644 index 0000000000..bb6865a669 --- /dev/null +++ b/share/blis/config/sifive_x280/bli_kernel_defs_sifive_x280.h @@ -0,0 +1,55 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, SiFive, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- +#define BLIS_MR_s 7 +#define BLIS_MR_d 7 +#define BLIS_MR_c 6 +#define BLIS_MR_z 6 + +#define BLIS_PACKMR_s 8 +#define BLIS_PACKMR_d 8 +#define BLIS_PACKMR_c 8 +#define BLIS_PACKMR_z 8 + +#define BLIS_NR_s 64 +#define BLIS_NR_d 32 +#define BLIS_NR_c 32 +#define BLIS_NR_z 16 +//#endif + diff --git a/share/blis/config/sifive_x280/make_defs.mk b/share/blis/config/sifive_x280/make_defs.mk new file mode 100644 index 0000000000..5f19e4e442 --- /dev/null +++ b/share/blis/config/sifive_x280/make_defs.mk @@ -0,0 +1,80 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2023, SiFive, Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := sifive_x280 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb_zvl512b -mabi=lp64d +CMISCFLAGS_SIFIVE_OTHER := +CPPROCFLAGS := +CMISCFLAGS := $(CMISCFLAGS_SIFIVE) $(CMISCFLAGS_SIFIVE_OTHER) \ + -fdata-sections -ffunction-sections \ + -fdiagnostics-color=always -fno-rtti -fno-exceptions +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wextra -Wno-unused-function -Wno-unused-parameter \ + -Wno-sign-compare -Wno-unused-variable + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/skx/bli_kernel_defs_skx.h b/share/blis/config/skx/bli_kernel_defs_skx.h new file mode 100644 index 0000000000..2aaf477ad5 --- /dev/null +++ b/share/blis/config/skx/bli_kernel_defs_skx.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 32 +#define BLIS_MR_d 16 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 14 + +//#endif + diff --git a/share/blis/config/skx/make_defs.mk b/share/blis/config/skx/make_defs.mk new file mode 100644 index 0000000000..589e73dda0 --- /dev/null +++ b/share/blis/config/skx/make_defs.mk @@ -0,0 +1,126 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := skx +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -O3 -fomit-frame-pointer +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=skylake-avx512 +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xCORE-AVX512 +else +ifeq ($(CC_VENDOR),clang) +# NOTE: We have to use -march=haswell on Windows because apparently AVX512 +# uses an alternate calling convention where xmm registers are not callee-saved +# on the stack. When this is mixed with framework code compiled for general +# x86_64 mode then chaos ensues (e.g. #514). +ifeq ($(IS_WIN),yes) +CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=haswell +else +CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=skylake-avx512 +endif +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# The assembler on OS X won't recognize AVX512 without help +ifneq ($(CC_VENDOR),icc) +ifeq ($(OS_NAME),Darwin) +CKVECFLAGS += -Wa,-march=skylake-avx512 +endif +endif + +# Flags specific to reference kernels. +# Note: We use AVX2 for reference kernels because, as Jeff Hammond says, +# reference kernel code "is not going to achieve high enough SIMD utilization +# to overcome the AVX-512 frequency drop". (Issue #187) +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),icc) +CRVECFLAGS := -xCORE-AVX2 +else +ifeq ($(CC_VENDOR),clang) +# NOTE: We have to use -march=haswell on Windows because apparently AVX512 +# uses an alternate calling convention where xmm registers are not callee-saved +# on the stack. When this is mixed with framework code compiled for general +# x86_64 mode then chaos ensues (e.g. #514). +ifeq ($(IS_WIN),yes) +CRVECFLAGS := -march=haswell -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast +endif +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/steamroller/bli_kernel_defs_steamroller.h b/share/blis/config/steamroller/bli_kernel_defs_steamroller.h new file mode 100644 index 0000000000..df4a8c4118 --- /dev/null +++ b/share/blis/config/steamroller/bli_kernel_defs_steamroller.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 16 +#define BLIS_MR_d 8 +#define BLIS_MR_c 4 +#define BLIS_MR_z 2 + +#define BLIS_NR_s 3 +#define BLIS_NR_d 3 +#define BLIS_NR_c 2 +#define BLIS_NR_z 2 + +//#endif + diff --git a/share/blis/config/steamroller/make_defs.mk b/share/blis/config/steamroller/make_defs.mk new file mode 100644 index 0000000000..122472c85d --- /dev/null +++ b/share/blis/config/steamroller/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := steamroller +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver3 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver3 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +$(error gcc or clang are required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/thunderx2/bli_kernel_defs_thunderx2.h b/share/blis/config/thunderx2/bli_kernel_defs_thunderx2.h new file mode 100644 index 0000000000..60292099cc --- /dev/null +++ b/share/blis/config/thunderx2/bli_kernel_defs_thunderx2.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/thunderx2/make_defs.mk b/share/blis/config/thunderx2/make_defs.mk new file mode 100644 index 0000000000..fd7df2eee2 --- /dev/null +++ b/share/blis/config/thunderx2/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := thunderx2 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=thunderx2t99 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=thunderx2t99 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=thunderx2t99 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/x86_64/make_defs.mk b/share/blis/config/x86_64/make_defs.mk new file mode 100644 index 0000000000..3c912370e0 --- /dev/null +++ b/share/blis/config/x86_64/make_defs.mk @@ -0,0 +1,94 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := x86_64 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xSSE3 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/zen/bli_kernel_defs_zen.h b/share/blis/config/zen/bli_kernel_defs_zen.h new file mode 100644 index 0000000000..c5bc8d63f3 --- /dev/null +++ b/share/blis/config/zen/bli_kernel_defs_zen.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 6 +#define BLIS_MR_d 6 +#define BLIS_MR_c 3 +#define BLIS_MR_z 3 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 +#define BLIS_NR_c 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/zen/make_defs.mk b/share/blis/config/zen/make_defs.mk new file mode 100644 index 0000000000..389a313b6c --- /dev/null +++ b/share/blis/config/zen/make_defs.mk @@ -0,0 +1,93 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := zen +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -fomit-frame-pointer +endif + +# Flags specific to optimized and reference kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -O3 +CROPTFLAGS := $(CKOPTFLAGS) +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +ifeq ($(CC_VENDOR),gcc) + ifeq ($(GCC_OT_6_1_0),yes) # gcc versions older than 6.1. + CVECFLAGS_VER := -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp + else + CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store + endif +else +ifeq ($(CC_VENDOR),clang) + CVECFLAGS_VER := -march=znver1 +else +ifeq ($(CC_VENDOR),aocc) + CVECFLAGS_VER := -march=znver1 -mllvm -disable-licm-vrp +else + $(error gcc, clang, or aocc is required for this configuration.) +endif +endif +endif +CKVECFLAGS += $(CVECFLAGS_VER) +CRVECFLAGS += $(CVECFLAGS_VER) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/zen2/bli_kernel_defs_zen2.h b/share/blis/config/zen2/bli_kernel_defs_zen2.h new file mode 100644 index 0000000000..c5bc8d63f3 --- /dev/null +++ b/share/blis/config/zen2/bli_kernel_defs_zen2.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 6 +#define BLIS_MR_d 6 +#define BLIS_MR_c 3 +#define BLIS_MR_z 3 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 +#define BLIS_NR_c 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/zen2/make_defs.mk b/share/blis/config/zen2/make_defs.mk new file mode 100644 index 0000000000..1eebf7fa76 --- /dev/null +++ b/share/blis/config/zen2/make_defs.mk @@ -0,0 +1,105 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := zen2 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -fomit-frame-pointer +endif + +# Flags specific to optimized and reference kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -O3 +CROPTFLAGS := $(CKOPTFLAGS) +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +ifeq ($(CC_VENDOR),gcc) + ifeq ($(GCC_OT_6_1_0),yes) # gcc versions older than 6.1. + CVECFLAGS_VER := -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp + else + ifeq ($(GCC_OT_9_1_0),yes) # gcc versions 6.1 or newer, but older than 9.1. + CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store + else # gcc versions 9.1 or newer. + CVECFLAGS_VER := -march=znver2 + endif + endif +else +ifeq ($(CC_VENDOR),clang) + ifeq ($(CLANG_OT_9_0_0),yes) # clang versions older than 9.0. + CVECFLAGS_VER := -march=znver1 + else # clang versions 9.0 or newer. + CVECFLAGS_VER := -march=znver2 + endif +else +ifeq ($(CC_VENDOR),aocc) + ifeq ($(AOCC_OT_2_0_0),yes) # aocc versions older than 2.0. + CVECFLAGS_VER := -march=znver1 -mllvm -disable-licm-vrp + else # aocc versions 2.0 or newer. + CVECFLAGS_VER := -march=znver2 + endif +else + $(error gcc, clang, or aocc is required for this configuration.) +endif +endif +endif +CKVECFLAGS += $(CVECFLAGS_VER) +CRVECFLAGS += $(CVECFLAGS_VER) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/zen3/bli_kernel_defs_zen3.h b/share/blis/config/zen3/bli_kernel_defs_zen3.h new file mode 100644 index 0000000000..c5bc8d63f3 --- /dev/null +++ b/share/blis/config/zen3/bli_kernel_defs_zen3.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 6 +#define BLIS_MR_d 6 +#define BLIS_MR_c 3 +#define BLIS_MR_z 3 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 +#define BLIS_NR_c 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/zen3/make_defs.mk b/share/blis/config/zen3/make_defs.mk new file mode 100644 index 0000000000..0bd4ed3441 --- /dev/null +++ b/share/blis/config/zen3/make_defs.mk @@ -0,0 +1,126 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := zen3 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 +endif + +# Flags specific to optimized and reference kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -fomit-frame-pointer +CROPTFLAGS := $(CKOPTFLAGS) +CKVECFLAGS := -mavx2 -mfma +CRVECFLAGS := $(CKVECFLAGS) +ifeq ($(CC_VENDOR),gcc) + ifeq ($(GCC_OT_9_1_0),yes) # gcc versions older than 9.1. + CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store + else + ifeq ($(GCC_OT_10_3_0),yes) # gcc versions 9.1 or newer, but older than 10.3. + CVECFLAGS_VER := -march=znver2 + else # gcc versions 10.1 or newer. + CVECFLAGS_VER := -march=znver3 + endif + endif + CKVECFLAGS += -mfpmath=sse + CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) + ifeq ($(CLANG_OT_9_0_0),yes) # clang versions older than 9.0. + CVECFLAGS_VER := -march=znver1 + else + ifeq ($(CLANG_OT_12_0_0),yes) # clang versions 9.0 or newer, but older than 12.0. + CVECFLAGS_VER := -march=znver2 + else + ifeq ($(OS_NAME),Darwin) # clang version 12.0 on OSX lacks znver3 support + CVECFLAGS_VER := -march=znver2 + else # clang versions 12.0 or newer. + CVECFLAGS_VER := -march=znver3 + endif + endif + endif + CKVECFLAGS += -mfpmath=sse + CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),aocc) + ifeq ($(AOCC_OT_2_0_0),yes) # aocc versions older than 2.0. + CVECFLAGS_VER := -march=znver1 + else + ifeq ($(AOCC_OT_3_0_0),yes) # aocc versions 2.0 or newer, but older than 3.0. + CVECFLAGS_VER := -march=znver2 + else # aocc versions 3.0 or newer. + CVECFLAGS_VER := -march=znver3 + endif + endif + CKVECFLAGS += -mfpmath=sse + CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast +ifeq ($(CC_VENDOR),nvc) + CVECFLAGS_VER := -march=znver3 + CRVECFLAGS += -fast +else + $(error gcc, clang, nvc or aocc is required for this configuration.) +endif +endif +endif +endif +CKVECFLAGS += $(CVECFLAGS_VER) +CRVECFLAGS += $(CVECFLAGS_VER) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config_registry b/share/blis/config_registry new file mode 100644 index 0000000000..f30c7d835d --- /dev/null +++ b/share/blis/config_registry @@ -0,0 +1,70 @@ +# +# config_registry +# +# Please refer to the BLIS wiki on configurations for information on the +# syntax and semantics of this file [1]. +# +# [1] https://github.com/flame/blis/blob/master/docs/ConfigurationHowTo.md +# + +# Processor families. +x86_64: intel64 amd64 amd64_legacy +intel64: skx knl haswell sandybridge penryn generic +amd64_legacy: excavator steamroller piledriver bulldozer generic +amd64: zen3 zen2 zen generic +arm64: armsve firestorm thunderx2 cortexa57 cortexa53 generic +arm32: cortexa15 cortexa9 generic +power: power10 power9 generic + +# Intel architectures. +skx: skx/skx/haswell/zen +knl: knl/knl/haswell/zen +haswell: haswell/haswell/zen +sandybridge: sandybridge +penryn: penryn + +# AMD architectures. +zen3: zen3/zen3/zen2/zen/haswell +zen2: zen2/zen2/zen/haswell +zen: zen/zen/haswell +excavator: excavator/piledriver +steamroller: steamroller/piledriver +piledriver: piledriver +bulldozer: bulldozer + +# ARM architectures. +armsve: armsve/armsve +a64fx: a64fx/armsve + +# ARM Neon64 (4 pipes x 128b) architectures. +altramax: altramax/armv8a +altra: altra/armv8a +firestorm: firestorm/armv8a + +# ARM (2 pipes x 128b) architectures. +thunderx2: thunderx2/armv8a +cortexa57: cortexa57/armv8a +cortexa53: cortexa53/armv8a + +# ARM Vintage architectures. +cortexa15: cortexa15/armv7a +cortexa9: cortexa9/armv7a + +# IBM architectures. +power10: power10 +power9: power9 +bgq: bgq + +# RISC-V architectures. Added! +rv32i: rv32i/rvi +rv64i: rv64i/rvi +rv32iv: rv32iv/rviv +rv64iv: rv64iv/rviv +rvv_sg2042: rvv_sg2042 + +# SiFive architectures. +sifive_rvv: sifive_rvv +sifive_x280: sifive_x280/sifive_rvv + +# Generic architectures. +generic: generic diff --git a/share/blis/configure-plugin b/share/blis/configure-plugin new file mode 100755 index 0000000000..c217c4da83 --- /dev/null +++ b/share/blis/configure-plugin @@ -0,0 +1,5551 @@ +#!/usr/bin/env bash +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# Copyright (C) 2020-2022, Advanced Micro Devices, Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# shellcheck disable=2001,2249,2034,2154,2181,2312,2250,2292 + +# +# -- Helper functions ---------------------------------------------------------- +# + +print_usage() +{ + # Use the version string in the 'version' file since we don't have + # the patched version string yet. + if [ -z "${version}" ]; then + version=$(<"${version_filepath}") + fi + + # Echo usage info. + cat <