From be42876ad81e7de6db6e7f1aec3e0180278d98ad Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Thu, 12 May 2022 13:59:22 -0500 Subject: [PATCH 1/5] Expanded error handling options in configure. Details: - This commit makes changes pursuant to #479 that provides additional configure-time options for determining whether and how BLIS behaves when errors are detected at runtime. The options are presently not yet honored anywhere within the framework. However, this will change in the future. --- build/bli_config.h.in | 14 +++++++ configure | 92 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 97 insertions(+), 9 deletions(-) diff --git a/build/bli_config.h.in b/build/bli_config.h.in index fa6bbbe12e..a46258182b 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -79,6 +79,20 @@ #define BLIS_DISABLE_SBA_POOLS #endif +#if @enable_error_checking@ +#define BLIS_ENABLE_ERROR_CHECKING +#else +#define BLIS_DISABLE_ERROR_CHECKING +#endif + +#if @enable_error_return@ +#define BLIS_ENABLE_ERROR_RETURN +#endif + +#if @enable_error_abort@ +#define BLIS_ENABLE_ERROR_ABORT +#endif + #if @enable_mem_tracing@ #define BLIS_ENABLE_MEM_TRACING #else diff --git a/configure b/configure index 7e825f1dc3..ed5c2efd11 100755 --- a/configure +++ b/configure @@ -209,9 +209,30 @@ print_usage() echo " it no longer needs to call malloc() or free(), even" echo " across many separate level-3 operation invocations." echo " " + echo " --enable-error-checking, --disable-error-checking" + echo " " + echo " Enable (disabled by default) runtime error checking. This" + echo " includes checking for things such as inconsistent object" + echo " properties, memory allocation errors, and configuration" + echo " errors. When enabled, BLIS will report an error via the" + echo " method specified by the --error-handling-mode option." + echo " When disabled, any function that is set up to return an" + echo " error code will return \"success\" unconditionally." + echo " " + echo " --error-handling-mode=[return|abort]" + echo " " + echo " Specify the way that BLIS reacts to errors. The 'return'" + echo " mode causes BLIS to return an error code all the way up" + echo " the function stack to the caller, which may then be used" + echo " to query a human-readable error string. The 'abort' mode" + echo " causes BLIS to output the aforementioned error string and" + echo " then call abort(), which facilitates debugging through a" + echo " a debugger's backtrace feature. By default, the 'abort'" + echo " mode is used." + echo " " echo " --enable-mem-tracing, --disable-mem-tracing" echo " " - echo " Enable (disable by default) output to stdout that traces" + echo " Enable (disabled by default) output to stdout that traces" echo " the allocation and freeing of memory, including the names" echo " of the functions that triggered the allocation/freeing." echo " Enabling this option WILL NEGATIVELY IMPACT PERFORMANCE." @@ -339,8 +360,8 @@ print_usage() echo " these division instructions within the microkernel will" echo " incur a performance penalty, but numerical robustness will" echo " improve for certain cases involving denormal numbers that" - echo " would otherwise result in overflow in the pre-inverted" - echo " values." + echo " would otherwise result in overflow if pre-inversion were" + echo " employed." echo " " echo " --force-version=STRING" echo " " @@ -356,14 +377,14 @@ print_usage() echo " a sanity check to make sure these lists are constituted" echo " as expected." echo " " - echo " --complex-return=gnu|intel" + echo " --complex-return=[gnu|intel]" echo " " echo " Specify the way in which complex numbers are returned" - echo " from Fortran functions, either \"gnu\" (return in" - echo " registers) or \"intel\" (return via hidden argument)." + echo " from Fortran functions, either 'gnu' (return in" + echo " registers) or 'intel' (return via hidden argument)." echo " If not specified and the environment variable FC is set," echo " attempt to determine the return type from the compiler." - echo " Otherwise, the default is \"gnu\"." + echo " Otherwise, the default is 'gnu'." echo " " echo " -q, --quiet Suppress informational output. By default, configure" echo " is verbose. (NOTE: -q is not yet implemented)" @@ -2451,6 +2472,10 @@ main() quiet_flag='' show_config_list='' + # Error-related flags. + enable_error_checking='yes' + error_handling_mode='abort' + # Additional flags. enable_verbose='no' enable_arg_max_hack='no' @@ -2602,6 +2627,15 @@ main() disable-system) enable_system='no' ;; + enable-error-checking) + enable_error_checking='yes' + ;; + disable-error-checking) + enable_error_checking='no' + ;; + error-handling-mode=*) + error_handling_mode=${OPTARG#*=} + ;; enable-threading=*) threading_model=${OPTARG#*=} ;; @@ -3461,7 +3495,7 @@ main() exit 1 fi - # Convert 'yes' and 'no' flags to booleans. + # Check if we are enabling memory pools for large or small blocks. if [ "x${enable_pba_pools}" = "xyes" ]; then echo "${script_name}: internal memory pools for packing blocks are enabled." enable_pba_pools_01=1 @@ -3476,6 +3510,31 @@ main() echo "${script_name}: internal memory pools for small blocks are disabled." enable_sba_pools_01=0 fi + + # Check if we are enabling error checking. + if [ "x${enable_error_checking}" = "xyes" ]; then + echo "${script_name}: error checking is enabled." + enable_error_checking_01=1 + else + echo "${script_name}: error checking is disabled." + enable_error_checking_01=0 + fi + + # Check the error handling mode. + enable_error_return_01=0 + enable_error_abort_01=0 + if [ "x${error_handling_mode}" = "xreturn" ]; then + echo "${script_name}: requesting that error codes be returned to caller." + enable_error_return_01=1 + elif [ "x${error_handling_mode}" = "xabort" ]; then + echo "${script_name}: requesting that errors trigger a message followed by abort()." + enable_error_abort_01=1 + else + echo "${script_name}: *** Unsupported mode of error handling: ${error_handling_mode}." + exit 1 + fi + + # Check if we are enabling memory tracing output. if [ "x${enable_mem_tracing}" = "xyes" ]; then echo "${script_name}: memory tracing output is enabled." enable_mem_tracing_01=1 @@ -3483,6 +3542,8 @@ main() echo "${script_name}: memory tracing output is disabled." enable_mem_tracing_01=0 fi + + # Check if we are enabling support for libmemkind. if [ "x${has_memkind}" = "xyes" ]; then if [ "x${enable_memkind}" = "x" ]; then # If no explicit option was given for libmemkind one way or the other, @@ -3510,6 +3571,8 @@ main() enable_memkind="no" enable_memkind_01=0 fi + + # Check if we are enabling #pragma omp simd. if [ "x${pragma_omp_simd}" = "xyes" ]; then echo "${script_name}: compiler appears to support #pragma omp simd." enable_pragma_omp_simd_01=1 @@ -3517,6 +3580,8 @@ main() echo "${script_name}: compiler appears to not support #pragma omp simd." enable_pragma_omp_simd_01=0 fi + + # Check if we are enabling the BLAS/CBLAS compatibility layers. if [ "x${enable_blas}" = "xyes" ]; then echo "${script_name}: the BLAS compatibility layer is enabled." enable_blas_01=1 @@ -3533,6 +3598,8 @@ main() echo "${script_name}: the CBLAS compatibility layer is disabled." enable_cblas_01=0 fi + + # Check if we are enabling mixed datatype support. if [ "x${enable_mixed_dt}" = "xyes" ]; then echo "${script_name}: mixed datatype support is enabled." @@ -3551,6 +3618,8 @@ main() enable_mixed_dt_extra_mem_01=0 enable_mixed_dt_01=0 fi + + # Check if we are enabling skinny/unpacked (sup) matrix handling. if [ "x${enable_sup_handling}" = "xyes" ]; then echo "${script_name}: small matrix handling is enabled." enable_sup_handling_01=1 @@ -3558,6 +3627,8 @@ main() echo "${script_name}: small matrix handling is disabled." enable_sup_handling_01=0 fi + + # Check if we are enabling pre-inversion of diagonal elements for trsm. if [ "x${enable_trsm_preinversion}" = "xyes" ]; then echo "${script_name}: trsm diagonal element pre-inversion is enabled." enable_trsm_preinversion_01=1 @@ -3709,7 +3780,7 @@ main() exit 1 fi - echo "${script_name}: configuring complex return type as \"${complex_return}\"." + echo "${script_name}: configuring complex return type as '${complex_return}'." # Variables that may contain forward slashes, such as paths, need extra # escaping when used in sed commands. We insert those extra escape @@ -3891,6 +3962,9 @@ main() | sed -e "s/@enable_jrir_rr@/${enable_jrir_rr_01}/g" \ | sed -e "s/@enable_pba_pools@/${enable_pba_pools_01}/g" \ | sed -e "s/@enable_sba_pools@/${enable_sba_pools_01}/g" \ + | sed -e "s/@enable_error_checking@/${enable_error_checking_01}/g" \ + | sed -e "s/@enable_error_return@/${enable_error_return_01}/g" \ + | sed -e "s/@enable_error_abort@/${enable_error_abort_01}/g" \ | sed -e "s/@enable_mem_tracing@/${enable_mem_tracing_01}/g" \ | sed -e "s/@int_type_size@/${int_type_size}/g" \ | sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \ From a1449967a3b272906b06db776091c9c15996ddc7 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sun, 10 Jul 2022 10:53:20 -0500 Subject: [PATCH 2/5] Initial error handling infrastructure + level0. Details: - Fully updated frame/0 in accordance to new error-handling APIs and error-checking policies. This includes: a. all functions that might possibly generate an error now return a value of type err_t. b. any such function in (a) that is called will have its return value captured and inspected for further return. - Updated about half of the files within frame/base for err_t handling. - Partially updated frame/thread, as necessary to given the updated err_t return values for other code included in this commit. A key omission was the thread decorators, which do not yet handle err_t values. - Added a new file, bli_error_macro_defs.h, of error-related macros. These macros conveniently capture the logic that should be executed in several common situations, including checking a locally-determined error code for failure (and acting accordingly) as well as checking whether the err_t return value from a recently-called function needs to be returned up the function stack (in leiu of completing execution of the current function normally). Not all of these functions are used in the changes introduced in this commit, but they represent most situations that I foresee needing going forward. - Re-indexed the err_t enum values so that BLIS_SUCCESS is assigned 0 and BLIS_FAILURE (that is, generic failure) is assigned -1, instead of -1 and -2, respectively. This beings the BLIS error code behavior into closer conformality with many other C and Linux functions and tools. - Defined a new errmode_t enum with two values -- BLIS_ERROR_RETURN and BLIS_ERROR_ABORT. - Defined new static inline functions in bli_param_macro_defs.h for distinguishing ind_t values (e.g. BLIS_NAT and BLIS_1M). Did the same for dir_t values (e.g. BLIS_FWD and BLIS_BWD). - Replaced all instances in BLIS of if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } else { rntm_l = *rntm; rntm = &rntm_l; } with a call to a new static inline function that offers identical functionality: bli_rntm_init_if_null( &rntm, &rntm_l ); - Replaced all instances in BLIS of if ( cntx == NULL ) cntx = bli_gks_query_cntx(); with a call to a new static inline function that offers identical functionality: bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); - Moved frame/base/cast/bli_castnzm.c and .h to an 'old' sub-directory. - Removed 'restrict' qualifier from cntx_t* argument to scalv and axpyf kernels in 'zen' kernel set. - Updated hardware auto-detection code to reflect updated function signature to bli_arch_string(). - Updated the output of 'configure --help' to correctly indicate that error checking is enabled by default. - Updated testsuite source files to conform to above changes. - Updated documentation to reflect updated function signatures, including removal of 'restrict' qualifier from cntx_t* and auxinfo_t* arguments to various kernels APIs. --- addon/gemmd/attic/bli_gemm_ex.c | 5 +- addon/gemmd/bao_gemmd.c | 5 +- build/detect/config/config_detect.c | 6 +- configure | 6 +- docs/BLISObjectAPI.md | 20 +- docs/BLISTypedAPI.md | 42 +- docs/ConfigurationHowTo.md | 6 +- docs/KernelsHowTo.md | 116 ++--- frame/0/bli_l0_check.c | 182 +++---- frame/0/bli_l0_check.h | 23 +- frame/0/bli_l0_ft.h | 20 +- frame/0/bli_l0_oapi.c | 96 +++- frame/0/bli_l0_oapi.h | 14 +- frame/0/bli_l0_tapi.c | 72 ++- frame/0/bli_l0_tapi.h | 20 +- frame/0/copysc/bli_copysc.c | 31 +- frame/0/copysc/bli_copysc.h | 4 +- frame/1/bli_l1v_tapi.c | 21 +- frame/1d/bli_l1d_tapi.c | 14 +- frame/1f/bli_l1f_tapi.c | 10 +- frame/1m/bli_l1m_tapi.c | 14 +- frame/1m/packm/bli_packm_cntl.c | 2 +- frame/1m/unpackm/bli_unpackm_cntl.c | 1 + frame/2/bli_l2_tapi.c | 14 +- frame/3/bli_l3_oapi_ex.c | 70 +-- frame/3/bli_l3_sup.c | 20 +- frame/3/bli_l3_thrinfo.c | 16 +- frame/3/bli_l3_thrinfo.h | 4 +- frame/3/gemm/bli_gemm_md.c | 3 +- frame/base/bli_apool.c | 225 ++++++--- frame/base/bli_apool.h | 39 +- frame/base/bli_arch.c | 215 ++++++--- frame/base/bli_arch.h | 14 +- frame/base/bli_array.c | 82 ++-- frame/base/bli_array.h | 11 +- frame/base/bli_check.c | 98 ++++ frame/base/bli_check.h | 10 + frame/base/bli_cntl.c | 5 +- frame/base/bli_error.c | 160 +++++- frame/base/bli_error.h | 28 +- frame/base/bli_gks.c | 589 ++++++++++++++--------- frame/base/bli_gks.h | 64 ++- frame/base/bli_ind.c | 84 +++- frame/base/bli_ind.h | 32 +- frame/base/bli_info.c | 127 ++++- frame/base/bli_info.h | 31 +- frame/base/bli_init.c | 90 +++- frame/base/bli_init.h | 16 +- frame/base/bli_memsys.c | 64 --- frame/base/bli_memsys.h | 47 -- frame/base/bli_pack.c | 63 ++- frame/base/bli_pack.h | 18 +- frame/base/bli_pba.c | 129 +++-- frame/base/bli_pba.h | 25 +- frame/base/bli_pool.c | 226 ++++++--- frame/base/bli_pool.h | 22 +- frame/base/bli_rntm.c | 2 +- frame/base/bli_rntm.h | 22 +- frame/base/bli_sba.c | 109 ++++- frame/base/bli_sba.h | 49 +- frame/base/cast/{ => old}/bli_castnzm.c | 0 frame/base/cast/{ => old}/bli_castnzm.h | 0 frame/compat/amd/bla_gemv_amd.c | 3 +- frame/compat/extra/bla_gemm3m.c | 6 +- frame/include/bli_error_macro_defs.h | 84 ++++ frame/include/bli_param_macro_defs.h | 30 ++ frame/include/bli_type_defs.h | 98 ++-- frame/include/blis.h | 5 +- frame/thread/bli_l3_decor_openmp.c | 6 +- frame/thread/bli_l3_decor_pthreads.c | 6 +- frame/thread/bli_l3_decor_single.c | 6 +- frame/thread/bli_l3_sup_decor_openmp.c | 3 +- frame/thread/bli_l3_sup_decor_pthreads.c | 3 +- frame/thread/bli_l3_sup_decor_single.c | 3 +- frame/thread/bli_thrcomm.h | 10 +- frame/thread/bli_thrcomm_openmp.c | 15 +- frame/thread/bli_thrcomm_pthreads.c | 15 +- frame/thread/bli_thrcomm_single.c | 16 +- frame/thread/bli_thread.c | 64 ++- frame/thread/bli_thread.h | 12 +- frame/thread/bli_thrinfo.c | 240 ++++++--- frame/thread/bli_thrinfo.h | 65 +-- frame/thread/bli_thrinfo_sup.c | 247 ++++++---- frame/thread/bli_thrinfo_sup.h | 24 +- frame/util/bli_util_check.c | 2 +- frame/util/bli_util_tapi.c | 18 +- kernels/zen/1/bli_scalv_zen_int10.c | 6 +- kernels/zen/1f/bli_axpyf_zen_int_4.c | 4 +- kernels/zen/1f/bli_axpyf_zen_int_5.c | 14 +- sandbox/gemmlike/bli_gemm_ex.c | 5 +- sandbox/gemmlike/bls_gemm.c | 5 +- sandbox/old/ref99/bli_gemmnat.c | 5 +- sandbox/power10/bli_gemm_ex.c | 5 +- testsuite/src/test_axpy2v.c | 4 +- testsuite/src/test_axpyf.c | 4 +- testsuite/src/test_dotaxpyv.c | 4 +- testsuite/src/test_dotxaxpyf.c | 4 +- testsuite/src/test_dotxf.c | 4 +- testsuite/src/test_gemm_ukr.c | 4 +- testsuite/src/test_gemmtrsm_ukr.c | 10 +- testsuite/src/test_libblis.c | 268 +++++++---- testsuite/src/test_trsm_ukr.c | 10 +- 102 files changed, 3191 insertions(+), 1699 deletions(-) delete mode 100644 frame/base/bli_memsys.c delete mode 100644 frame/base/bli_memsys.h rename frame/base/cast/{ => old}/bli_castnzm.c (100%) rename frame/base/cast/{ => old}/bli_castnzm.h (100%) diff --git a/addon/gemmd/attic/bli_gemm_ex.c b/addon/gemmd/attic/bli_gemm_ex.c index 0f40d1cb39..88f0e159cf 100644 --- a/addon/gemmd/attic/bli_gemm_ex.c +++ b/addon/gemmd/attic/bli_gemm_ex.c @@ -69,11 +69,10 @@ void bli_gemm_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check the operands. if ( bli_error_checking_is_enabled() ) diff --git a/addon/gemmd/bao_gemmd.c b/addon/gemmd/bao_gemmd.c index 01185a9d75..35653279a6 100644 --- a/addon/gemmd/bao_gemmd.c +++ b/addon/gemmd/bao_gemmd.c @@ -78,13 +78,12 @@ void bao_gemmd_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check parameters. if ( bli_error_checking_is_enabled() ) diff --git a/build/detect/config/config_detect.c b/build/detect/config/config_detect.c index 5f1ea0f420..b501f6b23a 100644 --- a/build/detect/config/config_detect.c +++ b/build/detect/config/config_detect.c @@ -69,8 +69,10 @@ int main( int argc, char** argv ) { - arch_t id = bli_cpuid_query_id(); - const char* s = bli_arch_string( id ); + const char* s; + + arch_t id = bli_cpuid_query_id(); + err_t r_val = bli_arch_string( id, &s ); printf( "%s\n", s ); diff --git a/configure b/configure index 0a884261b1..43d16bd40c 100755 --- a/configure +++ b/configure @@ -211,7 +211,7 @@ print_usage() echo " " echo " --enable-error-checking, --disable-error-checking" echo " " - echo " Enable (disabled by default) runtime error checking. This" + echo " Disable (enabled by default) runtime error checking. This" echo " includes checking for things such as inconsistent object" echo " properties, memory allocation errors, and configuration" echo " errors. When enabled, BLIS will report an error via the" @@ -226,8 +226,8 @@ print_usage() echo " the function stack to the caller, which may then be used" echo " to query a human-readable error string. The 'abort' mode" echo " causes BLIS to output the aforementioned error string and" - echo " then call abort(), which facilitates debugging through a" - echo " a debugger's backtrace feature. By default, the 'abort'" + echo " then call abort(), which facilitates debugging (e.g. via" + echo " a debugger's backtrace feature). By default, the 'abort'" echo " mode is used." echo " " echo " --enable-mem-tracing, --disable-mem-tracing" diff --git a/docs/BLISObjectAPI.md b/docs/BLISObjectAPI.md index 5e8ed3d8fb..f618786abc 100644 --- a/docs/BLISObjectAPI.md +++ b/docs/BLISObjectAPI.md @@ -203,9 +203,9 @@ The expert interface contains two additional parameters: a `cntx_t*` and `rntm_t In general, it is permissible to pass in `NULL` for a `cntx_t*` parameter when calling an expert interface such as `bli_gemm_ex()`. However, there are cases where `NULL` values are not accepted and may result in a segmentation fault. Specifically, the `cntx_t*` argument appears in the interfaces to the `gemm`, `trsm`, and `gemmtrsm` [level-3 microkernels](KernelsHowTo.md#level-3) along with all [level-1v](KernelsHowTo.md#level-1v) and [level-1f](KernelsHowTo.md#level-1f) kernels. There, as a general rule, a valid pointer must be passed in. Whenever a valid context is needed, the developer may query a default context from the global kernel structure (if a context is not already available in the current scope): ```c -cntx_t* bli_gks_query_cntx( void ); +err_t bli_gks_query_cntx( cntx** cntx ); ``` -When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to select and and return the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally return a pointer to the context appropriate for the targeted configuration. +When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to provide the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally provide a pointer to the context appropriate for the targeted configuration. ## Runtime type @@ -2288,15 +2288,15 @@ char* bli_info_get_version_str( void ); ## Specific configuration -The following routine returns a unique ID of type `arch_t` that identifies the current current active configuration: +The following routine determines a unique ID of type `arch_t` that identifies the current current active configuration: ```c -arch_t bli_arch_query_id( void ); +err_t bli_arch_query_id( arch_t* id ); ``` This is most useful when BLIS is configured with multiple configurations. (When linking to multi-configuration builds of BLIS, you don't know for sure which configuration will be used until runtime since the configuration-specific parameters are not loaded until after calling a hueristic to detect the hardware--usually based the `CPUID` instruction.) Once the configuration's ID is known, it can be used to query a string that contains the name of the configuration: ```c -char* bli_arch_string( arch_t id ); +err_t bli_arch_string( arch_t id, const char** str ); ``` ## General configuration @@ -2328,11 +2328,11 @@ gint_t bli_info_get_blas_int_type_size( void ); The following routines allow the caller to obtain a string that identifies the implementation type of each microkernel that is currently active (ie: part of the current active configuration, as identified bi `bli_arch_query_id()`). ```c -char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) +err_t bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt, char** str ) ``` Possible implementation (ie: the `ind_t method` argument) types are: diff --git a/docs/BLISTypedAPI.md b/docs/BLISTypedAPI.md index 76d7ef8f63..4c087dbe32 100644 --- a/docs/BLISTypedAPI.md +++ b/docs/BLISTypedAPI.md @@ -154,11 +154,11 @@ The expert interface contains two additional parameters: a `cntx_t*` and `rntm_t ## Context type -In general, it is permissible to pass in `NULL` for a `cntx_t*` parameter when calling an expert interface such as `bli_dgemm_ex()`. However, there are cases where `NULL` values are not accepted and may result in a segmentation fault. Specifically, the `cntx_t*` argument appears in the interfaces to the `gemm`, `trsm`, and `gemmtrsm` [level-3 microkernels](KernelsHowTo.md#level-3) along with all [level-1v](KernelsHowTo.md#level-1v) and [level-1f](KernelsHowTo.md#level-1f) kernels. There, as a general rule, a valid pointer must be passed in. Whenever a valid context is needed, the developer may query a default context from the global kernel structure (if a context is not already available in the current scope): +In general, it is permissible to pass in `NULL` for a `cntx_t*` parameter when calling an expert interface such as `bli_gemm_ex()`. However, there are cases where `NULL` values are not accepted and may result in a segmentation fault. Specifically, the `cntx_t*` argument appears in the interfaces to the `gemm`, `trsm`, and `gemmtrsm` [level-3 microkernels](KernelsHowTo.md#level-3) along with all [level-1v](KernelsHowTo.md#level-1v) and [level-1f](KernelsHowTo.md#level-1f) kernels. There, as a general rule, a valid pointer must be passed in. Whenever a valid context is needed, the developer may query a default context from the global kernel structure (if a context is not already available in the current scope): ```c -cntx_t* bli_gks_query_cntx( void ); +err_t bli_gks_query_cntx( cntx** cntx ); ``` -When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to select and and return the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally return a pointer to the context appropriate for the targeted configuration. +When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to provide the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally provide a pointer to the context appropriate for the targeted configuration. ## Runtime type @@ -1967,15 +1967,15 @@ char* bli_info_get_version_str( void ); ## Specific configuration -The following routine returns a unique ID of type `arch_t` that identifies the current current active configuration: +The following routine determines a unique ID of type `arch_t` that identifies the current current active configuration: ```c -arch_t bli_arch_query_id( void ); +err_t bli_arch_query_id( arch_t* id ); ``` This is most useful when BLIS is configured with multiple configurations. (When linking to multi-configuration builds of BLIS, you don't know for sure which configuration will be used until runtime since the configuration-specific parameters are not loaded until after calling a hueristic to detect the hardware--usually based the `CPUID` instruction.) Once the configuration's ID is known, it can be used to query a string that contains the name of the configuration: ```c -char* bli_arch_string( arch_t id ); +err_t bli_arch_string( arch_t id, const char** str ); ``` ## General configuration @@ -2007,11 +2007,11 @@ gint_t bli_info_get_blas_int_type_size( void ); The following routines allow the caller to obtain a string that identifies the implementation type of each microkernel that is currently active (ie: part of the current active configuration, as identified bi `bli_arch_query_id()`). ```c -char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) +err_t bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt, char** str ) ``` Possible implementation (ie: the `ind_t method` argument) types are: @@ -2029,16 +2029,16 @@ Possible microkernel types (ie: the return values for `bli_info_get_*_ukr_impl_s The following routines allow the caller to obtain a string that identifies the implementation (`ind_t`) that is currently active (ie: implemented and enabled) for each level-3 operation. Possible implementation types are listed in the section above covering [microkernel implemenation query](BLISTypedAPI.md#microkernel-implementation-type-query). ```c -char* bli_info_get_gemm_impl_string( num_t dt ); -char* bli_info_get_hemm_impl_string( num_t dt ); -char* bli_info_get_herk_impl_string( num_t dt ); -char* bli_info_get_her2k_impl_string( num_t dt ); -char* bli_info_get_symm_impl_string( num_t dt ); -char* bli_info_get_syrk_impl_string( num_t dt ); -char* bli_info_get_syr2k_impl_string( num_t dt ); -char* bli_info_get_trmm_impl_string( num_t dt ); -char* bli_info_get_trmm3_impl_string( num_t dt ); -char* bli_info_get_trsm_impl_string( num_t dt ); +err_t bli_info_get_gemm_impl_string( num_t dt, char** str ); +err_t bli_info_get_hemm_impl_string( num_t dt, char** str ); +err_t bli_info_get_herk_impl_string( num_t dt, char** str ); +err_t bli_info_get_her2k_impl_string( num_t dt, char** str ); +err_t bli_info_get_symm_impl_string( num_t dt, char** str ); +err_t bli_info_get_syrk_impl_string( num_t dt, char** str ); +err_t bli_info_get_syr2k_impl_string( num_t dt, char** str ); +err_t bli_info_get_trmm_impl_string( num_t dt, char** str ); +err_t bli_info_get_trmm3_impl_string( num_t dt, char** str ); +err_t bli_info_get_trsm_impl_string( num_t dt, char** str ); ``` diff --git a/docs/ConfigurationHowTo.md b/docs/ConfigurationHowTo.md index cc12241823..f3ed2d956a 100644 --- a/docs/ConfigurationHowTo.md +++ b/docs/ConfigurationHowTo.md @@ -595,7 +595,7 @@ Adding support for a new umbrella configuration family in BLIS is fairly straigh ``` The `BLIS_FAMILY_INTELAVX` will automatically be defined by the build system whenever the family was targeted by `configure` is `intelavx`. (In general, if the user runs `./configure foobar`, the C preprocessor macro `BLIS_FAMILY_FOOBAR` will be defined.) - * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` returns the correct `arch_t` microarchitecture ID value to the caller. This function is called when the framework is trying to choose which sub-configuration to use at runtime. For x86_64 architectures, this is supported via the `CPUID` instruction, as implemented via `bli_cpuid_query_id()`. Thus, you can simply mimic what is done for the `intel64` family by inserting lines such as: + * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` determines the correct `arch_t` microarchitecture ID value for the caller. This function is called when the framework is trying to choose which sub-configuration to use at runtime. For x86_64 architectures, this is supported via the `CPUID` instruction, as implemented via `bli_cpuid_query_id()`. Thus, you can simply mimic what is done for the `intel64` family by inserting lines such as: ```c #ifdef BLIS_FAMILY_INTELAVX id = bli_cpuid_query_id(); @@ -718,13 +718,13 @@ Adding support for a new-subconfiguration to BLIS is similar to adding support f - * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` returns the correct `arch_t` architecture ID value to the caller. `bli_arch_query_id()` is called when the framework is trying to choose which sub-configuration to use at runtime. When adding support for a sub-configuration as a singleton family, this amounts to adding a block of code such as: + * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` determines the correct `arch_t` architecture ID value for the caller. This function is called when the framework is trying to choose which sub-configuration to use at runtime. When adding support for a sub-configuration as a singleton family, this amounts to adding a block of code such as: ```c #ifdef BLIS_FAMILY_KNL id = BLIS_ARCH_KNL; #endif ``` - The `BLIS_FAMILY_KNL` macro is automatically `#defined` by the build system if the `knl` sub-configuration was targeted directly (as a singleton family) at configure-time. Other ID values are returned only if their respective family macros are defined. (Recall that only one family is ever enabled at time.) If, however, the `knl` sub-configuration was enabled indirectly via an umbrella family, `bli_arch_query_id()` will return the `arch_t` ID value via the lines similar to the following: + The `BLIS_FAMILY_KNL` macro is automatically `#defined` by the build system if the `knl` sub-configuration was targeted directly (as a singleton family) at configure-time. Other ID values are returned only if their respective family macros are defined. (Recall that only one family is ever enabled at time.) If, however, the `knl` sub-configuration was enabled indirectly via an umbrella family, `bli_arch_query_id()` will provide the `arch_t` ID value via the lines similar to the following: ```c #ifdef BLIS_FAMILY_INTEL64 id = bli_cpuid_query_id(); diff --git a/docs/KernelsHowTo.md b/docs/KernelsHowTo.md index 6e84db8e76..c864becc73 100644 --- a/docs/KernelsHowTo.md +++ b/docs/KernelsHowTo.md @@ -118,29 +118,15 @@ not already available in your current scope, a default context for the hardware for which BLIS was configured (or, in the case of multi-configuration builds, the hardware on which BLIS is currently running) may be queried via: ```c -cntx_t* bli_gks_query_cntx( void ); +err_t bli_gks_query_cntx( const cntx_t** cntx ); ``` -Once this `cntx_t*` pointer is obtained, you may call one of three functions to query any of the computation kernels described in this document: +Once this `cntx_t*` pointer is obtained, you may call the following function to query any of the computation kernels described in this document: ```c -void* bli_cntx_get_l3_nat_ukr_dt +void_fp bli_cntx_get_ukr_dt ( - num_t dt, - l3ukr_t ker_id, - cntx_t* cntx - ); - -void* bli_cntx_get_l1f_ker_dt - ( - num_t dt, - l1fkr_t ker_id, - cntx_t* cntx - ); - -void* bli_cntx_get_l1v_ker_dt - ( - num_t dt, - l1vkr_t ker_id, - cntx_t* cntx + num_t dt, + ukr_t ker_id, + const cntx_t* cntx ); ``` The `dt` and `ker_id` parameters specify the floating-point datatype and the @@ -152,30 +138,26 @@ Valid values for `ker_id` are given in the tables below. Also, note that the return values of `bli_cntx_get_l1v_ker_dt` `bli_cntx_get_l1f_ker_dt()`, and `bli_cntx_get_l3_nat_ukr_dt()`, -will be `void*` and must be typecast to typed function pointers before being called. +will be `void_fp` and must be typecast to typed function pointers before being called. As a convenience, BLIS defines function pointer types appropriate for usage in these situations. The function pointer type for each operation is given in the third columns of each table, with the `?` taking the place of one of the supported datatype characters. -| kernel operation | l3ukr_t | function pointer type | +| kernel operation | ukr_t | function pointer type | |:-----------------|:----------------------|:----------------------| | gemm | `BLIS_GEMM` | `?gemm_ukr_ft` | | trsm_l | `BLIS_TRSM_L_UKR` | `?trsm_ukr_ft` | | trsm_u | `BLIS_TRSM_U_UKR` | `?trsm_ukr_ft` | | gemmtrsm_l | `BLIS_GEMMTRSM_L_UKR` | `?gemmtrsm_ukr_ft` | | gemmtrsm_u | `BLIS_GEMMTRSM_U_UKR` | `?gemmtrsm_ukr_ft` | - -| kernel operation | l1fkr_t | function pointer type | -|:-----------------|:----------------------|:----------------------| +| | | | | axpy2v | `BLIS_AXPY2V_KER` | `?axpy2v_ft` | | dotaxpyv | `BLIS_DOTAXPYV_KER` | `?dotaxpyv_ft` | | axpyf | `BLIS_AXPYF_KER` | `?axpyf_ft` | | dotxf | `BLIS_DOTXF_KER` | `?dotxf_ft` | | dotxaxpyf | `BLIS_DOTXAXPYF_KER` | `?dotxaxpyf_ft` | - -| kernel operation | l1vkr_t | function pointer type | -|:-----------------|:----------------------|:----------------------| +| | | | | addv | `BLIS_ADDV_KER` | `?addv_ft` | | amaxv | `BLIS_AMAXV_KER` | `?amaxv_ft` | | axpyv | `BLIS_AXPYV_KER` | `?axpyv_ft` | @@ -256,8 +238,8 @@ void bli_?gemm_ ctype* restrict b1, ctype* restrict beta, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -274,8 +256,8 @@ void bli_?gemm_ukernel ctype* restrict b1, ctype* restrict beta, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` This function simply queries a microkernel function pointer from the context specified by `cntx`. Note that in the case of either method of calling the microkernel, `cntx` must be a valid pointer. (Passing in `NULL` will *not* result in a default context being used.) @@ -373,8 +355,8 @@ void bli_?trsm_l_ ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); void bli_?trsm_u_ @@ -382,8 +364,8 @@ void bli_?trsm_u_ ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -395,8 +377,8 @@ void bli_?trsm_l_ukernel ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); void bli_?trsm_u_ukernel @@ -404,8 +386,8 @@ void bli_?trsm_u_ukernel ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -473,8 +455,8 @@ void bli_?gemmtrsm_l_ ctype* restrict b01, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); void bli_?gemmtrsm_u_ @@ -488,8 +470,8 @@ void bli_?gemmtrsm_u_ ctype* restrict b21, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -507,8 +489,8 @@ void bli_?gemmtrsm_l_ukernel ctype* restrict b01, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); void bli_?gemmtrsm_u_ukernel @@ -522,8 +504,8 @@ void bli_?gemmtrsm_u_ukernel ctype* restrict b21, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -655,7 +637,7 @@ void bli_?axpy2v_ ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, ctype* restrict z, inc_t incz, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -679,7 +661,7 @@ void bli_?dotaxpyv_ ctype* restrict y, inc_t incy, ctype* restrict rho, ctype* restrict z, inc_t incz, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -703,7 +685,7 @@ void bli_?axpyf_ ctype* restrict a, inc_t inca, inc_t lda, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -727,7 +709,7 @@ void bli_?dotxf_ ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -757,7 +739,7 @@ void bli_?dotxaxpyf_ ctype* restrict beta, ctype* restrict y, inc_t incy, ctype* restrict z, inc_t incz, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -785,7 +767,7 @@ void bli_?addv_ dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -803,7 +785,7 @@ void bli_?amaxv_ dim_t n, ctype* restrict x, inc_t incx, dim_t* restrict index, - cntx_t* restrict cntx + cntx_t* cntx ) ``` Given a vector of length _n_, this kernel returns the zero-based index `index` of the element of vector `x` that contains the largest absolute value (or, in the complex domain, the largest complex modulus). @@ -821,7 +803,7 @@ void bli_?axpyv_ ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -842,7 +824,7 @@ void bli_?axpbyv_ ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -861,7 +843,7 @@ void bli_?copyv_ dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -882,7 +864,7 @@ void bli_?dotv_ ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, ctype* restrict rho, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -905,7 +887,7 @@ void bli_?dotxv_ ctype* restrict y, inc_t incy, ctype* restrict beta, ctype* restrict rho, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -922,7 +904,7 @@ void bli_?invertv_ ( dim_t n, ctype* restrict x, inc_t incx, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel inverts all elements of an _n_-length vector `x`. @@ -937,7 +919,7 @@ void bli_?scalv_ dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -957,7 +939,7 @@ void bli_?scal2v_ ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -976,7 +958,7 @@ void bli_?setv_ dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -995,7 +977,7 @@ void bli_?subv_ dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -1013,7 +995,7 @@ void bli_?swapv_ dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel swaps corresponding elements of two _n_-length vectors `x` and `y` stored with strides `incx` and `incy`, respectively. @@ -1029,7 +1011,7 @@ void bli_?xpbyv_ ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: diff --git a/frame/0/bli_l0_check.c b/frame/0/bli_l0_check.c index 02867a22d2..64c9777db9 100644 --- a/frame/0/bli_l0_check.c +++ b/frame/0/bli_l0_check.c @@ -41,13 +41,13 @@ #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* psi \ ) \ { \ - bli_l0_xxsc_check( chi, psi ); \ + return bli_l0_xx_check( chi, psi ); \ } GENFRONT( addsc ) @@ -61,12 +61,12 @@ GENFRONT( subsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi \ ) \ { \ - bli_l0_xsc_check( chi ); \ + return bli_l0_x_check( chi ); \ } GENFRONT( invertsc ) @@ -75,13 +75,13 @@ GENFRONT( invertsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* norm \ ) \ { \ - bli_l0_xx2sc_check( chi, norm ); \ + return bli_l0_xx2_check( chi, norm ); \ } GENFRONT( absqsc ) @@ -89,7 +89,7 @@ GENFRONT( normfsc ) // ----------------------------------------------------------------------------- -void bli_getsc_check +err_t bli_getsc_check ( const obj_t* chi, const double* zeta_r, @@ -101,21 +101,23 @@ void bli_getsc_check // Check object datatypes. //e_val = bli_check_noninteger_object( chi ); - //bli_check_error_code( e_val ); + //bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_setsc_check +err_t bli_setsc_check ( double zeta_r, double zeta_i, @@ -127,21 +129,23 @@ void bli_setsc_check // Check object datatypes. //e_val = bli_check_floating_object( chi ); - //bli_check_error_code( e_val ); + //bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_unzipsc_check +err_t bli_unzipsc_check ( const obj_t* chi, const obj_t* zeta_r, @@ -152,52 +156,54 @@ void bli_unzipsc_check // Check object datatypes. - e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( chi ); + bli_check_return_error_code( e_val ); - e_val = bli_check_real_object( zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_real_object( zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_real_object( zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_real_object( zeta_i ); + bli_check_return_error_code( e_val ); - e_val = bli_check_nonconstant_object( zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_nonconstant_object( zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_nonconstant_object( zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_nonconstant_object( zeta_i ); + bli_check_return_error_code( e_val ); - e_val = bli_check_object_real_proj_of( chi, zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_object_real_proj_of( chi, zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_object_real_proj_of( chi, zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_object_real_proj_of( chi, zeta_i ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( zeta_r ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( zeta_i ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( zeta_r ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( zeta_i ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_zipsc_check +err_t bli_zipsc_check ( const obj_t* zeta_r, const obj_t* zeta_i, @@ -208,51 +214,53 @@ void bli_zipsc_check // Check object datatypes. - e_val = bli_check_real_object( zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_real_object( zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_real_object( zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_real_object( zeta_i ); + bli_check_return_error_code( e_val ); - e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( chi ); + bli_check_return_error_code( e_val ); - e_val = bli_check_nonconstant_object( chi ); - bli_check_error_code( e_val ); + e_val = bli_check_nonconstant_object( chi ); + bli_check_return_error_code( e_val ); - e_val = bli_check_object_real_proj_of( chi, zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_object_real_proj_of( chi, zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_object_real_proj_of( chi, zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_object_real_proj_of( chi, zeta_i ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( zeta_r ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( zeta_i ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( zeta_r ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( zeta_i ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_l0_xsc_check +err_t bli_l0_x_check ( const obj_t* chi ) @@ -262,23 +270,25 @@ void bli_l0_xsc_check // Check object datatypes. e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_nonconstant_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_l0_xxsc_check +err_t bli_l0_xx_check ( const obj_t* chi, const obj_t* psi @@ -289,32 +299,34 @@ void bli_l0_xxsc_check // Check object datatypes. e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_noninteger_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_nonconstant_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_l0_xx2sc_check +err_t bli_l0_xx2_check ( const obj_t* chi, const obj_t* absq @@ -325,35 +337,37 @@ void bli_l0_xx2sc_check // Check object datatypes. e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_nonconstant_object( absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_real_object( absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_real_proj_of( chi, absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_l0_xxbsc_check +err_t bli_l0_xxbool_check ( const obj_t* chi, const obj_t* psi, @@ -365,25 +379,27 @@ void bli_l0_xxbsc_check // Check object datatypes. e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_noninteger_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } diff --git a/frame/0/bli_l0_check.h b/frame/0/bli_l0_check.h index 1bbb4a7564..a113f6b467 100644 --- a/frame/0/bli_l0_check.h +++ b/frame/0/bli_l0_check.h @@ -40,7 +40,7 @@ #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* psi \ @@ -57,7 +57,7 @@ GENTPROT( subsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi \ ); @@ -68,7 +68,7 @@ GENTPROT( invertsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* absq \ @@ -81,7 +81,7 @@ GENTPROT( normfsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const double* zeta_r, \ @@ -94,7 +94,7 @@ GENTPROT( getsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ double zeta_r, \ double zeta_i, \ @@ -107,7 +107,7 @@ GENTPROT( setsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* zeta_r, \ @@ -120,7 +120,7 @@ GENTPROT( unzipsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* zeta_r, \ const obj_t* zeta_i, \ @@ -131,26 +131,27 @@ GENTPROT( zipsc ) // ----------------------------------------------------------------------------- -void bli_l0_xsc_check +err_t bli_l0_x_check ( const obj_t* chi ); -void bli_l0_xxsc_check +err_t bli_l0_xx_check ( const obj_t* chi, const obj_t* psi ); -void bli_l0_xx2sc_check +err_t bli_l0_xx2_check ( const obj_t* chi, const obj_t* norm ); -void bli_l0_xxbsc_check +err_t bli_l0_xxbool_check ( const obj_t* chi, const obj_t* psi, const bool* is_eq ); + diff --git a/frame/0/bli_l0_ft.h b/frame/0/bli_l0_ft.h index 01d90cc3bd..dfd420bfe8 100644 --- a/frame/0/bli_l0_ft.h +++ b/frame/0/bli_l0_ft.h @@ -42,7 +42,7 @@ #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ const ctype* chi, \ @@ -58,7 +58,7 @@ INSERT_GENTDEF( subsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ ctype* chi \ @@ -71,7 +71,7 @@ INSERT_GENTDEF( invertsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ const ctype* chi, \ @@ -85,7 +85,7 @@ INSERT_GENTDEF( mulsc ) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ ctype_r* absq \ @@ -98,7 +98,7 @@ INSERT_GENTDEFR( absqsc ) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ ctype_r* norm \ @@ -111,7 +111,7 @@ INSERT_GENTDEFR( normfsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ ctype* psi \ @@ -124,7 +124,7 @@ INSERT_GENTDEF( sqrtsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ double* zeta_r, \ @@ -138,7 +138,7 @@ INSERT_GENTDEF( getsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ double zeta_r, \ double zeta_i, \ @@ -152,7 +152,7 @@ INSERT_GENTDEF( setsc ) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ ctype_r* zeta_r, \ @@ -166,7 +166,7 @@ INSERT_GENTDEFR( unzipsc ) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype_r* zeta_r, \ const ctype_r* zeta_i, \ diff --git a/frame/0/bli_l0_oapi.c b/frame/0/bli_l0_oapi.c index 0bfdbe3b33..e938fee546 100644 --- a/frame/0/bli_l0_oapi.c +++ b/frame/0/bli_l0_oapi.c @@ -41,22 +41,27 @@ #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* absq \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi; \ num_t dt_absq_c = bli_obj_dt_proj_to_complex( absq ); \ \ const void* buf_chi; \ void* buf_absq = bli_obj_buffer_at_off( absq ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, absq ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, absq ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* If chi is a scalar constant, use dt_absq_c to extract the address of the corresponding constant value; otherwise, use the datatype encoded @@ -67,6 +72,7 @@ void PASTEMAC0(opname) \ void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ + return \ f \ ( \ buf_chi, \ @@ -81,13 +87,13 @@ GENFRONT( normfsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt = bli_obj_dt( psi ); \ \ @@ -95,14 +101,20 @@ void PASTEMAC0(opname) \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, psi ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, psi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ + return \ f \ ( \ conjchi, \ @@ -120,26 +132,32 @@ GENFRONT( subsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt = bli_obj_dt( chi ); \ \ conj_t conjchi = bli_obj_conj_status( chi ); \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ + return \ f \ ( \ conjchi, \ @@ -153,26 +171,32 @@ GENFRONT( invertsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt = bli_obj_dt( psi ); \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, psi ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, psi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ + return \ f \ ( \ buf_chi, \ @@ -186,14 +210,14 @@ GENFRONT( sqrtsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ double* zeta_r, \ double* zeta_i \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ num_t dt_def = BLIS_DCOMPLEX; \ @@ -203,9 +227,14 @@ void PASTEMAC0(opname) \ value to maximize precision, and since we don't know if the caller needs just the real or the real and imaginary parts. */ \ void* buf_chi = bli_obj_buffer_for_1x1( dt_def, chi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* The _check() routine prevents integer types, so we know that chi is either a constant or an actual floating-point type. */ \ @@ -216,6 +245,7 @@ void PASTEMAC0(opname) \ void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_use ); \ \ + return \ f \ ( \ buf_chi, \ @@ -230,26 +260,32 @@ GENFRONT( getsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ double zeta_r, \ double zeta_i, \ const obj_t* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ \ void* buf_chi = bli_obj_buffer_at_off( chi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \ + { \ + r_val = PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ + return \ f \ ( \ zeta_r, \ @@ -264,14 +300,14 @@ GENFRONT( setsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* zeta_r, \ const obj_t* zeta_i \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi; \ num_t dt_zeta_c = bli_obj_dt_proj_to_complex( zeta_r ); \ @@ -280,9 +316,14 @@ void PASTEMAC0(opname) \ \ void* buf_zeta_r = bli_obj_buffer_at_off( zeta_r ); \ void* buf_zeta_i = bli_obj_buffer_at_off( zeta_i ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* If chi is a scalar constant, use dt_zeta_c to extract the address of the corresponding constant value; otherwise, use the datatype encoded @@ -293,6 +334,7 @@ void PASTEMAC0(opname) \ void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ + return \ f \ ( \ buf_chi, \ @@ -307,14 +349,14 @@ GENFRONT( unzipsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* zeta_r, \ const obj_t* zeta_i, \ const obj_t* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ \ @@ -322,14 +364,20 @@ void PASTEMAC0(opname) \ void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, zeta_i ); \ \ void* buf_chi = bli_obj_buffer_at_off( chi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ + return \ f \ ( \ buf_zeta_i, \ diff --git a/frame/0/bli_l0_oapi.h b/frame/0/bli_l0_oapi.h index a34252cf7c..a9b91d90da 100644 --- a/frame/0/bli_l0_oapi.h +++ b/frame/0/bli_l0_oapi.h @@ -40,7 +40,7 @@ #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* absq \ @@ -53,7 +53,7 @@ GENPROT( normfsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ @@ -69,7 +69,7 @@ GENPROT( subsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi \ ); @@ -80,7 +80,7 @@ GENPROT( invertsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ double* zeta_r, \ @@ -93,7 +93,7 @@ GENPROT( getsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ double zeta_r, \ double zeta_i, \ @@ -106,7 +106,7 @@ GENPROT( setsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* zeta_r, \ @@ -119,7 +119,7 @@ GENPROT( unzipsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* zeta_r, \ const obj_t* zeta_i, \ diff --git a/frame/0/bli_l0_tapi.c b/frame/0/bli_l0_tapi.c index e0cdffcf34..2f87753b10 100644 --- a/frame/0/bli_l0_tapi.c +++ b/frame/0/bli_l0_tapi.c @@ -41,19 +41,21 @@ #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ const ctype* chi, \ ctype* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ ctype chi_conj; \ \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj, *psi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC( addsc, adds ) @@ -64,19 +66,21 @@ INSERT_GENTFUNC_BASIC( subsc, subs ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ ctype chi_conj; \ \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj ); \ PASTEMAC(ch,copys)( chi_conj, *chi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC( invertsc, inverts ) @@ -85,14 +89,14 @@ INSERT_GENTFUNC_BASIC( invertsc, inverts ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ const ctype* chi, \ ctype* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ if ( PASTEMAC(ch,eq0)( *chi ) ) \ { \ @@ -106,6 +110,8 @@ void PASTEMAC(ch,opname) \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj, *psi ); \ } \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC( mulsc, scals ) @@ -114,13 +120,13 @@ INSERT_GENTFUNC_BASIC( mulsc, scals ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* absq \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ ctype_r chi_r; \ ctype_r chi_i; \ @@ -135,6 +141,8 @@ void PASTEMAC(ch,opname) \ PASTEMAC(ch,absq2ris)( chi_r, chi_i, *absq, absq_i ); \ \ ( void )chi_i; \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNCR_BASIC0( absqsc ) @@ -143,16 +151,18 @@ INSERT_GENTFUNCR_BASIC0( absqsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* norm \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ /* norm = sqrt( chi_r * chi_r + chi_i * chi_i ); */ \ PASTEMAC2(ch,chr,abval2s)( *chi, *norm ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNCR_BASIC0( normfsc ) @@ -161,16 +171,18 @@ INSERT_GENTFUNCR_BASIC0( normfsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ /* NOTE: sqrtsc/sqrt2s differs from normfsc/abval2s in the complex domain. */ \ PASTEMAC(ch,sqrt2s)( *chi, *psi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC0( sqrtsc ) @@ -179,16 +191,18 @@ INSERT_GENTFUNC_BASIC0( sqrtsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ double* zeta_r, \ double* zeta_i \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ PASTEMAC2(ch,d,gets)( *chi, *zeta_r, *zeta_i ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC0( getsc ) @@ -197,16 +211,18 @@ INSERT_GENTFUNC_BASIC0( getsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ double zeta_r, \ double zeta_i, \ ctype* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ PASTEMAC2(d,ch,sets)( zeta_r, zeta_i, *chi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC0( setsc ) @@ -215,16 +231,18 @@ INSERT_GENTFUNC_BASIC0( setsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* zeta_r, \ ctype_r* zeta_i \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ PASTEMAC2(ch,chr,gets)( *chi, *zeta_r, *zeta_i ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNCR_BASIC0( unzipsc ) @@ -233,43 +251,49 @@ INSERT_GENTFUNCR_BASIC0( unzipsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype_r* zeta_r, \ const ctype_r* zeta_i, \ ctype* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ PASTEMAC2(chr,ch,sets)( *zeta_r, *zeta_i, *chi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNCR_BASIC0( zipsc ) // ----------------------------------------------------------------------------- -void bli_igetsc +err_t bli_igetsc ( const dim_t* chi, double* zeta_r, double* zeta_i ) { - bli_init_once(); + BLIS_INIT_ONCE(); PASTEMAC2(i,d,gets)( *chi, *zeta_r, *zeta_i ); + + return BLIS_SUCCESS; } -void bli_isetsc +err_t bli_isetsc ( double zeta_r, double zeta_i, dim_t* chi ) { - bli_init_once(); + BLIS_INIT_ONCE(); PASTEMAC2(d,i,sets)( zeta_r, zeta_i, *chi ); + + return BLIS_SUCCESS; } diff --git a/frame/0/bli_l0_tapi.h b/frame/0/bli_l0_tapi.h index b393034103..854604c3de 100644 --- a/frame/0/bli_l0_tapi.h +++ b/frame/0/bli_l0_tapi.h @@ -40,7 +40,7 @@ #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ const ctype* chi, \ @@ -56,7 +56,7 @@ INSERT_GENTPROT_BASIC0( subsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi \ @@ -68,7 +68,7 @@ INSERT_GENTPROT_BASIC0( invertsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* absq \ @@ -81,7 +81,7 @@ INSERT_GENTPROTR_BASIC0( normfsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype* psi \ @@ -93,7 +93,7 @@ INSERT_GENTPROT_BASIC0( sqrtsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ double* zeta_r, \ @@ -106,7 +106,7 @@ INSERT_GENTPROT_BASIC0( getsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ double zeta_r, \ double zeta_i, \ @@ -119,7 +119,7 @@ INSERT_GENTPROT_BASIC0( setsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* zeta_r, \ @@ -132,7 +132,7 @@ INSERT_GENTPROTR_BASIC0( unzipsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype_r* zeta_r, \ const ctype_r* zeta_i, \ @@ -143,14 +143,14 @@ INSERT_GENTPROTR_BASIC0( zipsc ) // ----------------------------------------------------------------------------- -BLIS_EXPORT_BLIS void bli_igetsc +BLIS_EXPORT_BLIS err_t bli_igetsc ( const dim_t* chi, double* zeta_r, double* zeta_i ); -BLIS_EXPORT_BLIS void bli_isetsc +BLIS_EXPORT_BLIS err_t bli_isetsc ( double zeta_r, double zeta_i, diff --git a/frame/0/copysc/bli_copysc.c b/frame/0/copysc/bli_copysc.c index c2e01d07b0..769ad0faf5 100644 --- a/frame/0/copysc/bli_copysc.c +++ b/frame/0/copysc/bli_copysc.c @@ -39,7 +39,7 @@ // an operation that can be used to typecast (copy-cast) a scalar // of one datatype to a scalar of another datatype. -typedef void (*FUNCPTR_T) +typedef err_t (*FUNCPTR_T) ( conj_t conjchi, const void* chi, @@ -55,13 +55,13 @@ static FUNCPTR_T GENARRAY2_ALL(ftypes,copysc); #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ conj_t conjchi = bli_obj_conj_status( chi ); \ \ @@ -72,9 +72,14 @@ void PASTEMAC0(opname) \ void* buf_chi; \ \ FUNCPTR_T f; \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, psi ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, psi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* If chi is a scalar constant, use dt_psi to extract the address of the corresponding constant value; otherwise, use the datatype encoded @@ -86,11 +91,13 @@ void PASTEMAC0(opname) \ f = ftypes[dt_chi][dt_psi]; \ \ /* Invoke the void pointer-based function. */ \ - f( \ - conjchi, \ - buf_chi, \ - buf_psi \ - ); \ + return \ + f \ + ( \ + conjchi, \ + buf_chi, \ + buf_psi \ + ); \ } GENFRONT( copysc ) @@ -103,14 +110,14 @@ GENFRONT( copysc ) #undef GENTFUNC2 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ -void PASTEMAC2(chx,chy,varname) \ +err_t PASTEMAC2(chx,chy,varname) \ ( \ conj_t conjchi, \ const void* chi, \ void* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ const ctype_x* chi_cast = chi; \ ctype_y* psi_cast = psi; \ @@ -123,6 +130,8 @@ void PASTEMAC2(chx,chy,varname) \ { \ PASTEMAC2(chx,chy,copys)( *chi_cast, *psi_cast ); \ } \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC2_BASIC0( copysc ) diff --git a/frame/0/copysc/bli_copysc.h b/frame/0/copysc/bli_copysc.h index cd5481e576..33f8816780 100644 --- a/frame/0/copysc/bli_copysc.h +++ b/frame/0/copysc/bli_copysc.h @@ -40,7 +40,7 @@ #undef GENFRONT #define GENFRONT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ @@ -55,7 +55,7 @@ GENFRONT( copysc ) #undef GENTPROT2 #define GENTPROT2( ctype_x, ctype_y, chx, chy, varname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC2(chx,chy,varname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC2(chx,chy,varname) \ ( \ conj_t conjchi, \ const void* chi, \ diff --git a/frame/1/bli_l1v_tapi.c b/frame/1/bli_l1v_tapi.c index 01e3356d5f..abff96f521 100644 --- a/frame/1/bli_l1v_tapi.c +++ b/frame/1/bli_l1v_tapi.c @@ -59,7 +59,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -96,7 +96,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -133,7 +133,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -172,8 +172,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) \ - cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -213,7 +212,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -255,7 +254,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -293,7 +292,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -327,7 +326,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -363,7 +362,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -398,7 +397,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ diff --git a/frame/1d/bli_l1d_tapi.c b/frame/1d/bli_l1d_tapi.c index 60916cd568..465255d51f 100644 --- a/frame/1d/bli_l1d_tapi.c +++ b/frame/1d/bli_l1d_tapi.c @@ -98,7 +98,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -178,7 +178,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -238,7 +238,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -296,7 +296,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -373,7 +373,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ incx = 2*incx; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(chr,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt_r, kerid, cntx ); \ @@ -432,7 +432,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -510,7 +510,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ diff --git a/frame/1f/bli_l1f_tapi.c b/frame/1f/bli_l1f_tapi.c index 04d100cb30..e6735b5863 100644 --- a/frame/1f/bli_l1f_tapi.c +++ b/frame/1f/bli_l1f_tapi.c @@ -63,7 +63,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -107,7 +107,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -152,7 +152,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -202,7 +202,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -252,7 +252,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ diff --git a/frame/1m/bli_l1m_tapi.c b/frame/1m/bli_l1m_tapi.c index 6b802b9fef..88d73ca64d 100644 --- a/frame/1m/bli_l1m_tapi.c +++ b/frame/1m/bli_l1m_tapi.c @@ -63,7 +63,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -128,7 +128,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -201,7 +201,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -268,7 +268,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If alpha is zero, then we set the output matrix to zero. This seemingly minor optimization is important because it will clear @@ -359,7 +359,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -406,7 +406,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If beta is zero, then the operation reduces to copym. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ @@ -493,7 +493,7 @@ void PASTEMAC3(chx,chy,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If beta is zero, then the operation reduces to copym. */ \ if ( PASTEMAC(chy,eq0)( *beta ) ) \ diff --git a/frame/1m/packm/bli_packm_cntl.c b/frame/1m/packm/bli_packm_cntl.c index e99ed9cf3d..d9f0d323a4 100644 --- a/frame/1m/packm/bli_packm_cntl.c +++ b/frame/1m/packm/bli_packm_cntl.c @@ -57,7 +57,7 @@ BLIS_EXPORT_BLIS cntl_t* bli_packm_cntl_create_node #endif // Allocate a packm_params_t struct. - params = bli_sba_acquire( rntm, sizeof( packm_params_t ) ); + bli_sba_acquire( rntm, sizeof( packm_params_t ), ( void** )¶ms ); // Initialize the packm_params_t struct. params->size = sizeof( packm_params_t ); diff --git a/frame/1m/unpackm/bli_unpackm_cntl.c b/frame/1m/unpackm/bli_unpackm_cntl.c index 95d0545bec..f5b7dc31da 100644 --- a/frame/1m/unpackm/bli_unpackm_cntl.c +++ b/frame/1m/unpackm/bli_unpackm_cntl.c @@ -53,6 +53,7 @@ cntl_t* bli_unpackm_cntl_create_node // Allocate an unpackm_params_t struct. params = bli_malloc_intl( sizeof( unpackm_params_t ), &r_val ); + //r_val = bli_sba_acquire( rntm, sizeof( packm_params_t ), ( void** )¶ms ); // Initialize the unpackm_params_t struct. params->size = sizeof( unpackm_params_t ); diff --git a/frame/2/bli_l2_tapi.c b/frame/2/bli_l2_tapi.c index 4bef7c81a2..65dda73974 100644 --- a/frame/2/bli_l2_tapi.c +++ b/frame/2/bli_l2_tapi.c @@ -70,7 +70,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( m_y ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If x has zero elements, or if alpha is zero, scale y by beta and return early. */ \ @@ -147,7 +147,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ @@ -197,7 +197,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ BLIS_TAPI_EX_DECLS \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If x has zero elements, or if alpha is zero, scale y by beta and return early. */ \ @@ -281,7 +281,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ PASTEMAC2(chr,ch,copys)( *alpha, alpha_local ); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ @@ -338,7 +338,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ @@ -397,7 +397,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ @@ -458,7 +458,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( m ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If alpha is zero, set x to zero and return early. */ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ diff --git a/frame/3/bli_l3_oapi_ex.c b/frame/3/bli_l3_oapi_ex.c index 20b0294eb0..4019560469 100644 --- a/frame/3/bli_l3_oapi_ex.c +++ b/frame/3/bli_l3_oapi_ex.c @@ -74,11 +74,11 @@ void PASTEMAC(gemm,BLIS_OAPI_EX_SUF) } } - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -102,7 +102,7 @@ void PASTEMAC(gemm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -128,11 +128,11 @@ void PASTEMAC(gemmt,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -153,7 +153,7 @@ void PASTEMAC(gemmt,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -256,11 +256,11 @@ void PASTEMAC(hemm,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -281,7 +281,7 @@ void PASTEMAC(hemm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -306,11 +306,11 @@ void PASTEMAC(symm,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -331,7 +331,7 @@ void PASTEMAC(symm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -356,11 +356,11 @@ void PASTEMAC(trmm3,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -381,7 +381,7 @@ void PASTEMAC(trmm3,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -463,11 +463,11 @@ void PASTEMAC(trmm,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( b ); @@ -487,7 +487,7 @@ void PASTEMAC(trmm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -510,11 +510,11 @@ void PASTEMAC(trsm,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( b ); @@ -534,7 +534,7 @@ void PASTEMAC(trsm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) diff --git a/frame/3/bli_l3_sup.c b/frame/3/bli_l3_sup.c index eedbd9ec51..eece7b744e 100644 --- a/frame/3/bli_l3_sup.c +++ b/frame/3/bli_l3_sup.c @@ -58,7 +58,7 @@ err_t bli_gemmsup // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( &cntx ); \ // Return early if a microkernel preference-induced transposition would // have been performed and shifted the dimensions outside of the space @@ -86,11 +86,11 @@ err_t bli_gemmsup return BLIS_FAILURE; } - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); #if 0 const num_t dt = bli_obj_dt( c ); @@ -156,7 +156,7 @@ err_t bli_gemmtsup // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( &cntx ); \ // Return early if the problem dimensions exceed their sup thresholds. // Notice that we do not bother to check whether the microkernel @@ -171,11 +171,11 @@ err_t bli_gemmtsup return BLIS_FAILURE; } - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // We've now ruled out the possibility that the sup thresholds are // unsatisfied. diff --git a/frame/3/bli_l3_thrinfo.c b/frame/3/bli_l3_thrinfo.c index f866cfd4c5..04da3a4d32 100644 --- a/frame/3/bli_l3_thrinfo.c +++ b/frame/3/bli_l3_thrinfo.c @@ -64,7 +64,7 @@ void bli_l3_sup_thrinfo_free // ----------------------------------------------------------------------------- -void bli_l3_thrinfo_create_root +err_t bli_l3_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, @@ -88,7 +88,8 @@ void bli_l3_thrinfo_create_root dim_t work_id = gl_comm_id / ( n_threads / xx_way ); // Create the root thrinfo_t node. - *thread = bli_thrinfo_create + return + bli_thrinfo_create ( rntm, gl_comm, @@ -97,13 +98,14 @@ void bli_l3_thrinfo_create_root work_id, TRUE, bszid, - NULL + NULL, + thread ); } // ----------------------------------------------------------------------------- -void bli_l3_sup_thrinfo_create_root +err_t bli_l3_sup_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, @@ -130,7 +132,8 @@ void bli_l3_sup_thrinfo_create_root dim_t work_id = gl_comm_id / ( n_threads / xx_way ); // Create the root thrinfo_t node. - *thread = bli_thrinfo_create + return + bli_thrinfo_create ( rntm, gl_comm, @@ -139,7 +142,8 @@ void bli_l3_sup_thrinfo_create_root work_id, TRUE, bszid, - NULL + NULL, + thread ); } diff --git a/frame/3/bli_l3_thrinfo.h b/frame/3/bli_l3_thrinfo.h index 37a3909fd6..bb89875127 100644 --- a/frame/3/bli_l3_thrinfo.h +++ b/frame/3/bli_l3_thrinfo.h @@ -101,7 +101,7 @@ void bli_l3_sup_thrinfo_free // ----------------------------------------------------------------------------- -void bli_l3_thrinfo_create_root +err_t bli_l3_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, @@ -110,7 +110,7 @@ void bli_l3_thrinfo_create_root thrinfo_t** thread ); -void bli_l3_sup_thrinfo_create_root +err_t bli_l3_sup_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, diff --git a/frame/3/gemm/bli_gemm_md.c b/frame/3/gemm/bli_gemm_md.c index 1e23d058e0..7ae8ba2f2b 100644 --- a/frame/3/gemm/bli_gemm_md.c +++ b/frame/3/gemm/bli_gemm_md.c @@ -439,7 +439,8 @@ mddm_t bli_gemm_md_rcc // the target datatype. (The packm_blk_var1_md() function has "built-in" // support for packing to 1r (and 1e) schemas, whereas the // packm_blk_var1() function relies on packm kernels for packing to 1r. - const cntx_t* cntx_1m = bli_gks_query_ind_cntx( BLIS_1M ); + const cntx_t* cntx_1m; + bli_gks_query_ind_cntx( BLIS_1M, &cntx_1m ); const func_t* packm_1m_mr = bli_cntx_get_ukrs( BLIS_PACKM_MRXK_KER, cntx_1m ); const func_t* packm_1m_nr = bli_cntx_get_ukrs( BLIS_PACKM_NRXK_KER, cntx_1m ); diff --git a/frame/base/bli_apool.c b/frame/base/bli_apool.c index a42c7103e5..416cbe3e1b 100644 --- a/frame/base/bli_apool.c +++ b/frame/base/bli_apool.c @@ -34,7 +34,7 @@ #include "blis.h" -void bli_apool_init +err_t bli_apool_init ( apool_t* apool ) @@ -83,6 +83,11 @@ void bli_apool_init // ------------------------------------------------------------------------- + // Start off with a zeroed-out apool pool_t structure. + // NOTE: This is especially important because it zeroes out the .block_ptrs + // field, which bli_apool_finalize() uses to decide whether to return early. + bli_pool_clear( pool ); + // Make sure that block_ptrs_len is at least num_blocks. block_ptrs_len = bli_max( block_ptrs_len, num_blocks ); @@ -94,25 +99,13 @@ void bli_apool_init // Allocate the block_ptrs array. array_t** block_ptrs = - bli_malloc_intl( block_ptrs_len * sizeof( array_t* ), &r_val ); + bli_calloc_intl( block_ptrs_len * sizeof( array_t* ), &r_val ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_init(): allocating %d array_t.\n", ( int )num_blocks ); fflush( stdout ); #endif - // Allocate and initialize each entry in the block_ptrs array. - for ( dim_t i = 0; i < num_blocks; ++i ) - { - // Pass in num_elem so the function knows how many elements to - // initially have in each array_t. - bli_apool_alloc_block - ( - num_elem, - &(block_ptrs[i]) - ); - } - // NOTE: The semantics of top_index approximate a stack, where a "full" // stack (no blocks checked out) is one where top_index == 0 and an empty // stack (all blocks checked out) one where top_index == num_blocks. @@ -124,9 +117,16 @@ void bli_apool_init // number line in which blocks are checked out from lowest to highest, // and additional blocks are added at the higher end. + // If the allocation failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + // Initialize the pool_t structure. // NOTE: We don't use the malloc_fp and free_fp fields at the apool_t // level. Nevertheless, we set them to NULL. + // NOTE: Given that the calloc() succeeded, we must set these fields so + // that if any of the below calls to bli_apool_alloc_block() fail, there + // will be enough information in the structure to allow bli_apool_finalize() + // to de-allocate what was allocated. bli_pool_set_block_ptrs( block_ptrs, pool ); bli_pool_set_block_ptrs_len( block_ptrs_len, pool ); bli_pool_set_top_index( 0, pool ); @@ -135,12 +135,29 @@ void bli_apool_init bli_pool_set_align_size( align_size, pool ); bli_pool_set_malloc_fp( NULL, pool ); bli_pool_set_free_fp( NULL, pool ); + + // Allocate and initialize each entry in the block_ptrs array. + for ( dim_t i = 0; i < num_blocks; ++i ) + { + // Pass in num_elem so the function knows how many elements to + // initially have in each array_t. + r_val = bli_apool_alloc_block + ( + num_elem, + &(block_ptrs[i]) + ); + + // If the allocation failed, finalize the apool and return the error. + bli_check_callthen_return_if_failure( bli_apool_finalize( apool ), r_val ); + } + + return BLIS_SUCCESS; } -void bli_apool_alloc_block +err_t bli_apool_alloc_block ( siz_t num_elem, - array_t** array_p + array_t** array ) { err_t r_val; @@ -153,16 +170,20 @@ void bli_apool_alloc_block printf( "bli_apool_alloc_block(): allocating array_t: " ); #endif - // Allocate the array_t via the bli_fmalloc_align() wrapper, which performs - // alignment logic and opaquely saves the original pointer so that it can - // be recovered when it's time to free the block. - array_t* array = bli_malloc_intl( block_size, &r_val ); + // Allocate the array structure. We use calloc() so that all fields are + // initialized to zero, or NULL. + *array = bli_calloc_intl( block_size, &r_val ); + + // If the allocation failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Initialize an array_t struct within the newly allocated memory region. - bli_array_init( num_elem, sizeof( pool_t* ), array ); + r_val = bli_array_init( num_elem, sizeof( pool_t* ), *array ); + + // If the allocation failed, return the error code immediately. + bli_check_return_if_failure( r_val ); - // Save the pointer in the caller's array_t*. - *array_p = array; + return BLIS_SUCCESS; } void bli_apool_free_block @@ -170,55 +191,64 @@ void bli_apool_free_block array_t* array ) { + // Return early if the pointer to the array_t is NULL. + if ( array == NULL ) return; + const siz_t num_elem = bli_array_num_elem( array ); pool_t** buf = bli_array_buf( array ); - // Step through the array and finalize each pool_t. - for ( dim_t i = 0; i < num_elem; ++i ) + // Skip iterating over the buffer if it was never allocated. + if ( buf != NULL ) { - pool_t* pool = buf[ i ]; - - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_apool_free_block(): freeing pool_t %d within array_t.\n", - ( int )i ); - fflush( stdout ); - #endif - - // Finalize and free the current pool_t, if it was created/allocated. - if ( pool != NULL ) + // Step through the array and finalize each pool_t. + for ( dim_t i = 0; i < num_elem; ++i ) { - // Finalize the pool. - bli_pool_finalize( pool ); + pool_t* pool = buf[ i ]; #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_apool_free_block(): pool_t %d: ", ( int )i ); + printf( "bli_apool_free_block(): freeing pool_t %d within array_t.\n", + ( int )i ); + fflush( stdout ); #endif - // Free the pool_t struct. - bli_free_intl( pool ); + // Finalize and free the current pool_t, if it was created/allocated. + if ( pool != NULL ) + { + // Finalize the pool. + bli_pool_finalize( pool ); + + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_apool_free_block(): pool_t %d: ", ( int )i ); + #endif + + // Free the pool_t struct. + bli_free_intl( pool ); + } } - } - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_apool_free_block(): " ); - #endif + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_apool_free_block(): " ); + #endif - // Free the array buffer. - bli_array_finalize( array ); + // Free the array buffer. + bli_array_finalize( array ); + } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_free_block(): freeing array_t: " ); #endif - // Free the array. + // Free the array structure. bli_free_intl( array ); } -void bli_apool_finalize +err_t bli_apool_finalize ( apool_t* apool ) { + err_t r_val; + // NOTE: Since the apool_t's mutex is now initialized statically, we no // longer need to explicitly destroy it. @@ -236,14 +266,23 @@ void bli_apool_finalize // Query the block_ptrs array. array_t** block_ptrs = bli_pool_block_ptrs( pool ); + // Return early if the block_ptrs array is NULL. This would typically + // indicate that the pool structure was was cleared but never initialized. + if ( block_ptrs == NULL ) return BLIS_SUCCESS; + // Query the total number of blocks currently allocated. siz_t num_blocks = bli_pool_num_blocks( pool ); // Query the top_index of the pool. siz_t top_index = bli_pool_top_index( pool ); - // Sanity check: The top_index should be zero. - if ( top_index != 0 ) bli_abort(); + // Sanity check: The top_index should be zero. If it's not, then at + // least one block is still checked out to a thread. + if ( bli_error_checking_is_enabled() ) + { + r_val = bli_check_outstanding_mem_pool_blocks( top_index ); + bli_check_return_if_failure( r_val ); + } // Free the individual blocks (each an array_t) currently in the pool. for ( dim_t i = 0; i < num_blocks; ++i ) @@ -264,14 +303,22 @@ void bli_apool_finalize // Free the block_ptrs array. bli_free_intl( block_ptrs ); + + // Clear the pool structure. + bli_pool_clear( pool ); + + return BLIS_SUCCESS; } -array_t* bli_apool_checkout_array +err_t bli_apool_checkout_array ( - siz_t n_threads, - apool_t* apool + siz_t n_threads, + const array_t** array, + apool_t* apool ) { + err_t r_val; + // Acquire the apool_t's mutex. bli_apool_lock( apool ); @@ -290,7 +337,10 @@ array_t* bli_apool_checkout_array fflush( stdout ); #endif - bli_apool_grow( 1, apool ); + r_val = bli_apool_grow( 1, apool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); } // At this point, at least one array_t is guaranteed to be available. @@ -311,7 +361,7 @@ array_t* bli_apool_checkout_array #endif // Select the array_t* at top_index to return to the caller. - array_t* array = block_ptrs[ top_index ]; + array_t* array_p = block_ptrs[ top_index ]; // Increment the pool's top_index. bli_pool_set_top_index( top_index + 1, pool ); @@ -323,10 +373,12 @@ array_t* bli_apool_checkout_array // Resize the array_t according to the number of threads specified by the // caller. (We need one element in the array_t per thread.) - bli_array_resize( n_threads, array ); + bli_array_resize( n_threads, array_p ); - // Return the selected array_t*. - return array; + // Set the array pointer to the selected array_t*. + *array = array_p; + + return BLIS_SUCCESS; } void bli_apool_checkin_array @@ -372,10 +424,11 @@ void bli_apool_checkin_array bli_apool_unlock( apool ); } -pool_t* bli_apool_array_elem +err_t bli_apool_array_elem ( siz_t index, - array_t* array + array_t* array, + pool_t** pool ) { err_t r_val; @@ -389,12 +442,12 @@ pool_t* bli_apool_array_elem // stores in the array_t are pool_t*, that means that the function is // actually returning the address of a pool_t*, or pool_t**, hence the // dereferencing below. - pool_t** pool_p = bli_array_elem( index, array ); - pool_t* pool = *pool_p; + pool_t** pool_pp = bli_array_elem( index, array ); + pool_t* pool_p = *pool_pp; // If the element is NULL, then it means a pool_t has not yet been created // and allocated for the given index (thread id). - if ( pool == NULL ) + if ( pool_p == NULL ) { // Settle on the parameters to use when initializing the pool_t for // the current index within the array_t. @@ -429,10 +482,13 @@ pool_t* bli_apool_array_elem #endif // Allocate the pool_t. - pool = bli_malloc_intl( sizeof( pool_t ), &r_val ); + pool_p = bli_malloc_intl( sizeof( pool_t ), &r_val ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Initialize the pool_t. - bli_pool_init + r_val = bli_pool_init ( num_blocks, block_ptrs_len, @@ -441,25 +497,31 @@ pool_t* bli_apool_array_elem offset_size, malloc_fp, free_fp, - pool + pool_p ); + // If the previous function failed, free the pool_t we just allocated + // and return the error. + bli_check_callthen_return_if_failure( bli_free_intl( pool_p ), r_val ); + // Update the array element with the address to the new pool_t. // NOTE: We pass in the address of the pool_t* since the bli_array // API is generalized for arbitrarily-sized elements, and therefore - // it must always take the address of the data, rather than the - // value (which it can only do if the elem size were fixed). - bli_array_set_elem( &pool, index, array ); + // it must always take the address of the data, rather than the value + // (which it would only be able to do if the elem size were fixed). + bli_array_set_elem( &pool_p, index, array ); } // The array element is now guaranteed to refer to an allocated and // initialized pool_t. - // Return the array element. - return pool; + // Set the pool pointer to the newly allocated and initialized pool_t. + *pool = pool_p; + + return BLIS_SUCCESS; } -void bli_apool_grow +err_t bli_apool_grow ( siz_t num_blocks_add, apool_t* apool @@ -468,7 +530,7 @@ void bli_apool_grow err_t r_val; // If the requested increase is zero, return early. - if ( num_blocks_add == 0 ) return; + if ( num_blocks_add == 0 ) return BLIS_SUCCESS; // Query the underlying pool_t from the apool_t. pool_t* pool = bli_apool_pool( apool ); @@ -507,7 +569,10 @@ void bli_apool_grow // Allocate a new block_ptrs array. array_t** block_ptrs_new = - bli_malloc_intl( block_ptrs_len_new * sizeof( array_t* ), &r_val ); + bli_calloc_intl( block_ptrs_len_new * sizeof( array_t* ), &r_val ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); @@ -547,19 +612,27 @@ void bli_apool_grow fflush( stdout ); #endif + dim_t i; + // Allocate the requested additional blocks in the resized array. - for ( dim_t i = num_blocks_cur; i < num_blocks_new; ++i ) + for ( i = num_blocks_cur; i < num_blocks_new; ++i ) { - bli_apool_alloc_block + r_val = bli_apool_alloc_block ( num_elem, &(block_ptrs[i]) ); + + // If the previous function failed, update the number of blocks in the + // pool to reflect the number that were added and then return the error. + bli_check_callthen_return_if_failure( bli_pool_set_num_blocks( i, pool ), r_val ); } // Update the pool_t struct with the new number of allocated blocks. // Notice that top_index remains unchanged, as do the block_size and // align_size fields. bli_pool_set_num_blocks( num_blocks_new, pool ); + + return BLIS_SUCCESS; } diff --git a/frame/base/bli_apool.h b/frame/base/bli_apool.h index d06f79207b..052d8a45d2 100644 --- a/frame/base/bli_apool.h +++ b/frame/base/bli_apool.h @@ -92,19 +92,30 @@ BLIS_INLINE void bli_apool_set_def_array_len( siz_t def_array_len, apool_t* pool // ----------------------------------------------------------------------------- -void bli_apool_init +err_t bli_apool_init ( apool_t* apool ); -void bli_apool_finalize +err_t bli_apool_finalize ( apool_t* apool ); -array_t* bli_apool_checkout_array +err_t bli_apool_alloc_block ( - siz_t n_threads, - apool_t* apool + siz_t num_elem, + array_t** array + ); +void bli_apool_free_block + ( + array_t* array + ); + +err_t bli_apool_checkout_array + ( + siz_t n_threads, + const array_t** array, + apool_t* apool ); void bli_apool_checkin_array ( @@ -112,28 +123,18 @@ void bli_apool_checkin_array apool_t* apool ); -pool_t* bli_apool_array_elem +err_t bli_apool_array_elem ( siz_t index, - array_t* array + array_t* array, + pool_t** pool ); -void bli_apool_grow +err_t bli_apool_grow ( siz_t num_blocks_add, apool_t* apool ); -void bli_apool_alloc_block - ( - siz_t num_elem, - array_t** array_p - ); -void bli_apool_free_block - ( - array_t* array - ); - - #endif diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index 48b50a7748..1084c43b0e 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -67,32 +67,74 @@ // The arch_t id for the currently running hardware. We initialize to -1, // which will be overwritten upon calling bli_arch_set_id(). -static arch_t id = -1; +static arch_t the_id = -1; -arch_t bli_arch_query_id( void ) +err_t bli_arch_query_id( arch_t* id ) { +#ifndef BLIS_CONFIGURETIME_CPUID + + // Make sure the arch_t id has been set. + err_t r_val = bli_arch_set_id_once(); + bli_check_return_if_failure( r_val ); + +#else + + // configure's cpuid driver doesn't look at error codes, so we don't + // bother returning it. bli_arch_set_id_once(); - // Simply return the id that was previously cached. - return id; +#endif + + // Set the arch_t pointer. + *id = the_id; + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -// A pthread structure used in pthread_once(). pthread_once() is guaranteed to -// execute exactly once among all threads that pass in this control object. -static bli_pthread_once_t once_id = BLIS_PTHREAD_ONCE_INIT; +static bli_pthread_mutex_t arch_id_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; -void bli_arch_set_id_once( void ) +err_t bli_arch_set_id_once( void ) { #ifndef BLIS_CONFIGURETIME_CPUID - bli_pthread_once( &once_id, bli_arch_set_id ); + + err_t r_val = BLIS_SUCCESS; + + // If the arch_t id hasn't already been set, continue to the + // mutex acquisition step. + if ( the_id == -1 ) + { + // Acquire the mutex protecting initialization. + bli_pthread_mutex_lock( &arch_id_mutex ); + + // Check the arch_t id again now that we've obtained the lock. + if ( the_id == -1 ) + { + r_val = bli_arch_set_id(); + } + + // Release the mutex protecting initialization. + bli_pthread_mutex_unlock( &arch_id_mutex ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + } + +#else + + // configure's cpuid driver doesn't look at error codes, so we don't + // bother returning it. + bli_arch_set_id(); + #endif + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_arch_set_id( void ) +err_t bli_arch_set_id( void ) { // Check the environment variable BLIS_ARCH_DEBUG to see if the user // requested that we echo the result of the subconfiguration selection. @@ -104,38 +146,10 @@ void bli_arch_set_id( void ) dim_t req_id = bli_env_get_var( "BLIS_ARCH_TYPE", -1 ); #ifndef BLIS_CONFIGURETIME_CPUID - if ( req_id != -1 ) - { - // BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable. - - // If req_id was set to an invalid arch_t value (ie: outside the range - // [0,BLIS_NUM_ARCHS-1]), output an error message and abort. - if ( bli_error_checking_is_enabled() ) - { - err_t e_val = bli_check_valid_arch_id( req_id ); - bli_check_error_code( e_val ); - } - - // At this point, we know that req_id is in the valid range, but we - // don't yet know if it refers to a context that was actually - // initialized. Query the address of an internal context data structure - // corresponding to req_id. This pointer will be NULL if the associated - // subconfig is not available. - const cntx_t* const * req_cntx = bli_gks_lookup_id( req_id ); - - // This function checks the context pointer and aborts with a useful - // error message if the pointer is found to be NULL. - if ( bli_error_checking_is_enabled() ) - { - err_t e_val = bli_check_initialized_gks_cntx( req_cntx ); - bli_check_error_code( e_val ); - } - - // Finally, we can be confident that req_id (1) is in range and (2) - // refers to a context that has been initialized. - id = req_id; - } - else + // If req_id is -1, it indicates that BLIS_ARCH_TYPE was NOT set. This means + // we must query the hardware for the id (or read whatever value was hard- + // coded at configure-time). + if ( req_id == -1 ) #endif { // BLIS_ARCH_TYPE was unset. Proceed with normal subconfiguration @@ -147,104 +161,144 @@ void bli_arch_set_id( void ) defined BLIS_FAMILY_X86_64 || \ defined BLIS_FAMILY_ARM64 || \ defined BLIS_FAMILY_ARM32 - id = bli_cpuid_query_id(); + req_id = bli_cpuid_query_id(); #endif // Intel microarchitectures. #ifdef BLIS_FAMILY_SKX - id = BLIS_ARCH_SKX; + req_id = BLIS_ARCH_SKX; #endif #ifdef BLIS_FAMILY_KNL - id = BLIS_ARCH_KNL; + req_id = BLIS_ARCH_KNL; #endif #ifdef BLIS_FAMILY_KNC - id = BLIS_ARCH_KNC; + req_id = BLIS_ARCH_KNC; #endif #ifdef BLIS_FAMILY_HASWELL - id = BLIS_ARCH_HASWELL; + req_id = BLIS_ARCH_HASWELL; #endif #ifdef BLIS_FAMILY_SANDYBRIDGE - id = BLIS_ARCH_SANDYBRIDGE; + req_id = BLIS_ARCH_SANDYBRIDGE; #endif #ifdef BLIS_FAMILY_PENRYN - id = BLIS_ARCH_PENRYN; + req_id = BLIS_ARCH_PENRYN; #endif // AMD microarchitectures. #ifdef BLIS_FAMILY_ZEN3 - id = BLIS_ARCH_ZEN3; + req_id = BLIS_ARCH_ZEN3; #endif #ifdef BLIS_FAMILY_ZEN2 - id = BLIS_ARCH_ZEN2; + req_id = BLIS_ARCH_ZEN2; #endif #ifdef BLIS_FAMILY_ZEN - id = BLIS_ARCH_ZEN; + req_id = BLIS_ARCH_ZEN; #endif #ifdef BLIS_FAMILY_EXCAVATOR - id = BLIS_ARCH_EXCAVATOR; + req_id = BLIS_ARCH_EXCAVATOR; #endif #ifdef BLIS_FAMILY_STEAMROLLER - id = BLIS_ARCH_STEAMROLLER; + req_id = BLIS_ARCH_STEAMROLLER; #endif #ifdef BLIS_FAMILY_PILEDRIVER - id = BLIS_ARCH_PILEDRIVER; + req_id = BLIS_ARCH_PILEDRIVER; #endif #ifdef BLIS_FAMILY_BULLDOZER - id = BLIS_ARCH_BULLDOZER; + req_id = BLIS_ARCH_BULLDOZER; #endif // ARM microarchitectures. #ifdef BLIS_FAMILY_ARMSVE - id = BLIS_ARCH_ARMSVE; + req_id = BLIS_ARCH_ARMSVE; #endif #ifdef BLIS_FAMILY_A64FX - id = BLIS_ARCH_A64FX; + req_id = BLIS_ARCH_A64FX; #endif #ifdef BLIS_FAMILY_FIRESTORM - id = BLIS_ARCH_FIRESTORM; + req_id = BLIS_ARCH_FIRESTORM; #endif #ifdef BLIS_FAMILY_THUNDERX2 - id = BLIS_ARCH_THUNDERX2; + req_id = BLIS_ARCH_THUNDERX2; #endif #ifdef BLIS_FAMILY_CORTEXA57 - id = BLIS_ARCH_CORTEXA57; + req_id = BLIS_ARCH_CORTEXA57; #endif #ifdef BLIS_FAMILY_CORTEXA53 - id = BLIS_ARCH_CORTEXA53; + req_id = BLIS_ARCH_CORTEXA53; #endif #ifdef BLIS_FAMILY_CORTEXA15 - id = BLIS_ARCH_CORTEXA15; + req_id = BLIS_ARCH_CORTEXA15; #endif #ifdef BLIS_FAMILY_CORTEXA9 - id = BLIS_ARCH_CORTEXA9; + req_id = BLIS_ARCH_CORTEXA9; #endif // IBM microarchitectures. #ifdef BLIS_FAMILY_POWER10 - id = BLIS_ARCH_POWER10; + req_id = BLIS_ARCH_POWER10; #endif #ifdef BLIS_FAMILY_POWER9 - id = BLIS_ARCH_POWER9; + req_id = BLIS_ARCH_POWER9; #endif #ifdef BLIS_FAMILY_POWER7 - id = BLIS_ARCH_POWER7; + req_id = BLIS_ARCH_POWER7; #endif #ifdef BLIS_FAMILY_BGQ - id = BLIS_ARCH_BGQ; + req_id = BLIS_ARCH_BGQ; #endif // Generic microarchitecture. #ifdef BLIS_FAMILY_GENERIC - id = BLIS_ARCH_GENERIC; + req_id = BLIS_ARCH_GENERIC; #endif } - if ( bli_arch_get_logging() ) - fprintf( stderr, "libblis: selecting sub-configuration '%s'.\n", - bli_arch_string( id ) ); +#ifndef BLIS_CONFIGURETIME_CPUID + // If req_id was set to an invalid arch_t value (ie: outside the range + // [0,BLIS_NUM_ARCHS-1]), output an error message and abort. + if ( bli_error_checking_is_enabled() ) + { + err_t e_val = bli_check_valid_arch_id( req_id ); + bli_check_return_error_code( e_val ); + } + + // At this point, we know that req_id is in the valid range, but we + // don't yet know if it refers to a context that was actually + // initialized. Query the address of an internal context data structure + // corresponding to req_id. This pointer will be NULL if the associated + // subconfig is not available. + const cntx_t* const * req_cntx = bli_gks_lookup_id( req_id ); + + // This function checks the context pointer and aborts with a useful + // error message if the pointer is found to be NULL. + if ( bli_error_checking_is_enabled() ) + { + err_t e_val = bli_check_initialized_gks_cntx( req_cntx ); + bli_check_return_error_code( e_val ); + } +#endif - //printf( "blis_arch_query_id(): id = %u\n", id ); + // Finally, we can be confident that req_id (1) is in range and (2) + // refers to a context that has been initialized. So we cache it to + // the local static variable. + the_id = req_id; + + //printf( "blis_arch_query_id(): the_id = %u\n", id ); //exit(1); + + if ( bli_arch_get_logging() ) + { + // Query the string associated with the detected/chosen arch_t id. + // Note that we don't need to check the error code returned by + // bli_arch_string() since we've already confirmed that the arch_t + // id is valid. + const char* str; + bli_arch_string( the_id, &str ); + + fprintf( stderr, "libblis: selecting sub-configuration '%s'.\n", str ); + } + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -287,9 +341,20 @@ static const char* config_name[ BLIS_NUM_ARCHS ] = "generic" }; -const char* bli_arch_string( arch_t id ) +err_t bli_arch_string( arch_t id, const char** str ) { - return config_name[ id ]; +#ifndef BLIS_CONFIGURETIME_CPUID + err_t r_val = BLIS_SUCCESS; + + r_val = bli_check_valid_arch_id( id ); + bli_check_return_error_code( r_val ); +#endif + + // If the caller passed in a valid id, index into the string array and + // "return" the appropriate pointer. + *str = config_name[ id ]; + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_arch.h b/frame/base/bli_arch.h index 08af7ae79d..dfb84b0666 100644 --- a/frame/base/bli_arch.h +++ b/frame/base/bli_arch.h @@ -35,16 +35,16 @@ #ifndef BLIS_ARCH_H #define BLIS_ARCH_H -BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void ); +BLIS_EXPORT_BLIS err_t bli_arch_query_id( arch_t* id ); -void bli_arch_set_id_once( void ); -void bli_arch_set_id( void ); +err_t bli_arch_set_id_once( void ); +err_t bli_arch_set_id( void ); -BLIS_EXPORT_BLIS const char* bli_arch_string( arch_t id ); +BLIS_EXPORT_BLIS err_t bli_arch_string( arch_t id, const char** str ); -void bli_arch_set_logging( bool dolog ); -bool bli_arch_get_logging( void ); -void bli_arch_log( const char*, ... ); +void bli_arch_set_logging( bool dolog ); +bool bli_arch_get_logging( void ); +void bli_arch_log( const char*, ... ); #endif diff --git a/frame/base/bli_array.c b/frame/base/bli_array.c index ea47a0024c..716eb0c6ce 100644 --- a/frame/base/bli_array.c +++ b/frame/base/bli_array.c @@ -36,7 +36,7 @@ //#define BLIS_ENABLE_MEM_TRACING -void bli_array_init +err_t bli_array_init ( siz_t num_elem, siz_t elem_size, @@ -45,6 +45,9 @@ void bli_array_init { err_t r_val; + // Start off with a zeroed-out array_t structure. + bli_array_clear( array ); + #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_array_init(): allocating array [%d * %d]: ", ( int )num_elem, ( int )elem_size ); @@ -53,22 +56,29 @@ void bli_array_init // Compute the total size (in bytes) of the array. const size_t array_size = num_elem * elem_size; - // Allocate the array buffer. - void* buf = bli_malloc_intl( array_size, &r_val ); + // Allocate the array buffer. We use calloc() so that all elements are + // initialized to zero, or NULL. This allows us to deallocate only those + // blocks that were allocated in the event of a failure. + void* buf = bli_calloc_intl( array_size, &r_val ); - // Initialize the array elements to zero. THIS IS IMPORANT because - // consumer threads will use the NULL-ness of the array elements to - // determine if the corresponding block (data structure) needs to be - // created/allocated and initialized. - memset( buf, 0, array_size ); + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Initialize the array_t structure. bli_array_set_buf( buf, array ); bli_array_set_num_elem( num_elem, array ); bli_array_set_elem_size( elem_size, array ); + + // Initialize the array elements to zero. THIS IS IMPORANT because + // consumer threads will use the NULL-ness of the array elements to + // determine if the corresponding block (data structure) needs to be + // created/allocated and initialized. + //memset( buf, 0, array_size ); + + return BLIS_SUCCESS; } -void bli_array_resize +err_t bli_array_resize ( siz_t num_elem_new, array_t* array @@ -81,7 +91,7 @@ void bli_array_resize // If the new requested size (number of elements) is less than or equal to // the current size, no action is needed; return early. - if ( num_elem_new <= num_elem_prev ) return; + if ( num_elem_new <= num_elem_prev ) return BLIS_SUCCESS; // At this point, we know that num_elem_prev < num_elem_new, which means // we need to proceed with the resizing. @@ -104,6 +114,9 @@ void bli_array_resize // Allocate a new array buffer. char* buf_new = bli_malloc_intl( array_size_new, &r_val ); + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + // Copy the previous array contents to the new array. memcpy( buf_new, buf_prev, array_size_prev ); @@ -125,9 +138,11 @@ void bli_array_resize // NOTE: The array elem_size field does not need updating. bli_array_set_buf( buf_new, array ); bli_array_set_num_elem( num_elem_new, array ); + + return BLIS_SUCCESS; } -void bli_array_finalize +err_t bli_array_finalize ( array_t* array ) @@ -142,6 +157,8 @@ void bli_array_finalize // Free the buffer. bli_free_intl( buf ); + + return BLIS_SUCCESS; } void* bli_array_elem @@ -151,10 +168,10 @@ void* bli_array_elem ) { // Query the number of elements in the array. - const siz_t num_elem = bli_array_num_elem( array ); + //const siz_t num_elem = bli_array_num_elem( array ); // Sanity check: disallow access beyond the bounds of the array. - if ( num_elem <= index ) bli_abort(); + //if ( num_elem <= index ) bli_abort(); // Query the size of each element in the array. const siz_t elem_size = bli_array_elem_size( array ); @@ -183,31 +200,18 @@ void bli_array_set_elem // Query the buffer from the array as a char*. char* buf = bli_array_buf( array ); -// memcpy() is the only safe way to copy data of unknown type -#if 0 - if ( elem_size == sizeof( void* ) ) - { - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_array_set_elem(): elem_size is %d; setting index %d.\n", - ( int )elem_size, ( int )index ); - fflush( stdout ); - #endif - - // Special case: Handle elem_size = sizeof( void* ) without calling - // memcpy(). - void** buf_vvp = ( void** )buf; - void** elem_vvp = ( void** )elem; - - buf_vvp[ index ] = *elem_vvp; - } - else - { -#endif - // General case: Copy the elem_size bytes from elem to buf at the - // element index specified by index. - memcpy( &buf[ index * elem_size ], elem, ( size_t )elem_size ); -#if 0 - } -#endif + // Copy the elem_size bytes from elem to buf at the element index specified + // by index. + memcpy( &buf[ index * elem_size ], elem, ( size_t )elem_size ); +} + +void bli_array_clear + ( + array_t* array + ) +{ + bli_array_set_buf( NULL, array ); + bli_array_set_num_elem( 0, array ); + bli_array_set_elem_size( 0, array ); } diff --git a/frame/base/bli_array.h b/frame/base/bli_array.h index c1e6ce038a..19912892bf 100644 --- a/frame/base/bli_array.h +++ b/frame/base/bli_array.h @@ -85,18 +85,18 @@ BLIS_INLINE void bli_array_set_elem_size( siz_t elem_size, array_t* array ) \ // ----------------------------------------------------------------------------- -void bli_array_init +err_t bli_array_init ( siz_t num_elem, siz_t elem_size, array_t* array ); -void bli_array_resize +err_t bli_array_resize ( siz_t num_elem_new, array_t* array ); -void bli_array_finalize +err_t bli_array_finalize ( array_t* array ); @@ -113,5 +113,10 @@ void bli_array_set_elem array_t* array ); +void bli_array_clear + ( + array_t* array + ); + #endif diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index 16c418b49e..2a3a109581 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -37,6 +37,7 @@ // -- General stuff ------------------------------------------------------------ +#if 1 err_t bli_check_error_code_helper( gint_t code, const char* file, guint_t line ) { if ( code == BLIS_SUCCESS ) return code; @@ -56,6 +57,7 @@ err_t bli_check_error_code_helper( gint_t code, const char* file, guint_t line ) return code; } +#endif err_t bli_check_valid_error_level( errlev_t level ) { @@ -68,6 +70,17 @@ err_t bli_check_valid_error_level( errlev_t level ) return e_val; } +err_t bli_check_valid_error_mode( errmode_t mode ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( mode != BLIS_ERROR_RETURN && + mode != BLIS_ERROR_ABORT ) + e_val = BLIS_INVALID_ERROR_HANDLING_MODE; + + return e_val; +} + err_t bli_check_null_pointer( const void* ptr ) { err_t e_val = BLIS_SUCCESS; @@ -677,8 +690,32 @@ err_t bli_check_upper_or_lower_object( const obj_t* a ) return e_val; } +// -- Induced method-related checks -------------------------------------------- + +err_t bli_check_valid_ind( ind_t im ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( !bli_is_1m( im ) && + !bli_is_nat( im ) ) + e_val = BLIS_INVALID_IND; + + return e_val; +} + // -- Partitioning-related checks ---------------------------------------------- +err_t bli_check_valid_direct( dir_t direct ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( !bli_is_fwd( direct ) && + !bli_is_bwd( direct ) ) + e_val = BLIS_INVALID_DIRECTION; + + return e_val; +} + err_t bli_check_valid_3x1_subpart( subpart_t part ) { err_t e_val = BLIS_SUCCESS; @@ -729,6 +766,42 @@ err_t bli_check_valid_3x3_subpart( subpart_t part ) return e_val; } +err_t bli_check_valid_row_offset( dim_t i, obj_t* a ) +{ + err_t e_val = BLIS_SUCCESS; + + const dim_t m = bli_obj_length( a ); + + if ( i < 0 ) e_val = BLIS_ROW_OFFSET_LESS_THAN_ZERO; + else if ( m <= i ) e_val = BLIS_ROW_OFFSET_EXCEEDS_NUM_ROWS; + + return e_val; +} + +err_t bli_check_valid_col_offset( dim_t j, obj_t* a ) +{ + err_t e_val = BLIS_SUCCESS; + + const dim_t n = bli_obj_width( a ); + + if ( j < 0 ) e_val = BLIS_COL_OFFSET_LESS_THAN_ZERO; + else if ( n <= j ) e_val = BLIS_COL_OFFSET_EXCEEDS_NUM_COLS; + + return e_val; +} + +err_t bli_check_valid_vector_offset( dim_t i, obj_t* x ) +{ + err_t e_val = BLIS_SUCCESS; + + const dim_t n = bli_obj_vector_dim( x ); + + if ( i < 0 ) e_val = BLIS_VECTOR_OFFSET_LESS_THAN_ZERO; + else if ( n <= i ) e_val = BLIS_VECTOR_OFFSET_EXCEEDS_NUM_ELEM; + + return e_val; +} + // -- Control tree-related checks ---------------------------------------------- err_t bli_check_valid_cntl( const void* cntl ) @@ -871,6 +944,18 @@ err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ) return e_val; } +err_t bli_check_outstanding_mem_pool_blocks( siz_t top_index ) +{ + err_t e_val = BLIS_SUCCESS; + + // This function returns an error code if the top_index is not zero. + + if ( top_index != 0 ) + e_val = BLIS_MEM_POOL_BLOCKS_OUTSTANDING; + + return e_val; +} + // -- Object-related errors ---------------------------------------------------- err_t bli_check_object_alias_of( const obj_t* a, const obj_t* b ) @@ -958,3 +1043,16 @@ err_t bli_check_valid_kc_mod_mult( const blksz_t* kc, const blksz_t* kr ) return BLIS_SUCCESS; } +// -- Thread-related errors ---------------------------------------------------- + +err_t bli_check_num_threads_created( dim_t nt_req, dim_t nt_actual ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( nt_req != nt_actual ) + if ( nt_actual != 1 ) + e_val = BLIS_EXPECTED_DIFF_NUM_THREADS; + + return e_val; +} + diff --git a/frame/base/bli_check.h b/frame/base/bli_check.h index f1e2201a7e..07506152b2 100644 --- a/frame/base/bli_check.h +++ b/frame/base/bli_check.h @@ -37,6 +37,7 @@ BLIS_EXPORT_BLIS err_t bli_check_error_code_helper( gint_t code, const char* file, guint_t line ); err_t bli_check_valid_error_level( errlev_t level ); +err_t bli_check_valid_error_mode( errmode_t mode ); err_t bli_check_null_pointer( const void* ptr ); @@ -88,9 +89,15 @@ err_t bli_check_object_struc( const obj_t* a, struc_t struc ); err_t bli_check_upper_or_lower_object( const obj_t* a ); +err_t bli_check_valid_ind( ind_t im ); + +err_t bli_check_valid_direct( dir_t direct ); err_t bli_check_valid_3x1_subpart( subpart_t part ); err_t bli_check_valid_1x3_subpart( subpart_t part ); err_t bli_check_valid_3x3_subpart( subpart_t part ); +err_t bli_check_valid_row_offset( dim_t i, obj_t* a ); +err_t bli_check_valid_col_offset( dim_t j, obj_t* a ); +err_t bli_check_valid_vector_offset( dim_t i, obj_t* x ); err_t bli_check_valid_cntl( const void* cntl ); @@ -106,6 +113,7 @@ err_t bli_check_if_exhausted_pool( const pool_t* pool ); err_t bli_check_sufficient_stack_buf_size( const cntx_t* cntx ); err_t bli_check_alignment_is_power_of_two( size_t align_size ); err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ); +err_t bli_check_outstanding_mem_pool_blocks( siz_t top_index ); err_t bli_check_object_alias_of( const obj_t* a, const obj_t* b ); @@ -116,3 +124,5 @@ err_t bli_check_valid_mc_mod_mult( const blksz_t* mc, const blksz_t* mr ); err_t bli_check_valid_nc_mod_mult( const blksz_t* nc, const blksz_t* nr ); err_t bli_check_valid_kc_mod_mult( const blksz_t* kc, const blksz_t* kr ); +err_t bli_check_num_threads_created( dim_t nt_req, dim_t nt_actual ); + diff --git a/frame/base/bli_cntl.c b/frame/base/bli_cntl.c index b22ddbee0b..69a7c43e1c 100644 --- a/frame/base/bli_cntl.c +++ b/frame/base/bli_cntl.c @@ -53,7 +53,7 @@ cntl_t* bli_cntl_create_node #endif // Allocate the cntl_t struct. - cntl = bli_sba_acquire( rntm, sizeof( cntl_t ) ); + bli_sba_acquire( rntm, sizeof( cntl_t ), ( void** )&cntl ); bli_cntl_set_family( family, cntl ); bli_cntl_set_bszid( bszid, cntl ); @@ -273,7 +273,8 @@ cntl_t* bli_cntl_copy // struct. uint64_t params_size = bli_cntl_params_size( cntl ); void* params_orig = bli_cntl_params( cntl ); - void* params_copy = bli_sba_acquire( rntm, ( size_t )params_size ); + void* params_copy; + bli_sba_acquire( rntm, ( size_t )params_size, ( void** )¶ms_copy ); // Copy the original params struct to the new memory region. memcpy( params_copy, params_orig, params_size ); diff --git a/frame/base/bli_error.c b/frame/base/bli_error.c index f4933d9629..9704fe7113 100644 --- a/frame/base/bli_error.c +++ b/frame/base/bli_error.c @@ -39,9 +39,11 @@ static const char *bli_error_string[-BLIS_ERROR_CODE_MAX] = { [-BLIS_INVALID_ERROR_CHECKING_LEVEL] = "Invalid error checking level.", + [-BLIS_INVALID_ERROR_HANDLING_MODE] = "Invalid error handling mode.", [-BLIS_UNDEFINED_ERROR_CODE] = "Undefined error code.", [-BLIS_NULL_POINTER] = "Encountered unexpected null pointer.", [-BLIS_NOT_YET_IMPLEMENTED] = "Requested functionality not yet implemented.", + [-BLIS_REJECT_EXEC] = "Execution path rejected. NOTE: This error message should never be displayed.", [-BLIS_INVALID_SIDE] = "Invalid side parameter value.", [-BLIS_INVALID_UPLO] = "Invalid uplo_t parameter value.", @@ -83,9 +85,18 @@ static const char *bli_error_string[-BLIS_ERROR_CODE_MAX] = [-BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT] = "Expected upper or lower triangular object.", + [-BLIS_INVALID_IND] = "Invalid ind_t parameter value.", + + [-BLIS_INVALID_DIRECTION] = "Invalid dir_t parameter value.", [-BLIS_INVALID_3x1_SUBPART] = "Encountered invalid 3x1 (vertical) subpartition label.", [-BLIS_INVALID_1x3_SUBPART] = "Encountered invalid 1x3 (horizontal) subpartition label.", [-BLIS_INVALID_3x3_SUBPART] = "Encountered invalid 3x3 (diagonal) subpartition label.", + [-BLIS_ROW_OFFSET_LESS_THAN_ZERO] = "Encountered row offset less than zero.", + [-BLIS_ROW_OFFSET_EXCEEDS_NUM_ROWS] = "Encountered row offset that exceeds the number of matrix rows (m dimension).", + [-BLIS_COL_OFFSET_LESS_THAN_ZERO] = "Encountered column offset less than zero.", + [-BLIS_COL_OFFSET_EXCEEDS_NUM_COLS] = "Encountered column offset that exceeds the number of matrix columns (n dimension).", + [-BLIS_VECTOR_OFFSET_LESS_THAN_ZERO] = "Encountered vector offset less than zero.", + [-BLIS_VECTOR_OFFSET_EXCEEDS_NUM_ELEM] = "Encountered vector offset that exceeds the number of vector elements.", [-BLIS_UNEXPECTED_NULL_CONTROL_TREE] = "Encountered unexpected null control tree node.", @@ -100,6 +111,7 @@ static const char *bli_error_string[-BLIS_ERROR_CODE_MAX] = [-BLIS_INSUFFICIENT_STACK_BUF_SIZE] = "Configured maximum stack buffer size is insufficient for register blocksizes currently in use.", [-BLIS_ALIGNMENT_NOT_POWER_OF_TWO] = "Encountered memory alignment value that is either zero or not a power of two.", [-BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE] = "Encountered memory alignment value that is not a multiple of sizeof(void*).", + [-BLIS_MEM_POOL_BLOCKS_OUTSTANDING] = "One or more blocks still checked out at the time a memory pool was finalized.", [-BLIS_EXPECTED_OBJECT_ALIAS] = "Expected object to be alias.", @@ -112,52 +124,166 @@ static const char *bli_error_string[-BLIS_ERROR_CODE_MAX] = [-BLIS_NC_MAX_NONMULTIPLE_OF_NR] = "Maximum NC is non-multiple of NR for one or more datatypes.", [-BLIS_KC_DEF_NONMULTIPLE_OF_KR] = "Default KC is non-multiple of KR for one or more datatypes.", [-BLIS_KC_MAX_NONMULTIPLE_OF_KR] = "Maximum KC is non-multiple of KR for one or more datatypes.", + + [-BLIS_EXPECTED_DIFF_NUM_THREADS] = "A different number of threads was created than was requested.", }; // ----------------------------------------------------------------------------- -void bli_print_msg( const char* str, const char* file, guint_t line ) +// A mutex to allow synchronous access to the variable controlling the error +// checking level. +static bli_pthread_mutex_t err_level_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; + +// Set the default (initial) error checking level based on how BLIS was +// configured. Note that we declare the variable as thread-local so that +// application threads can operate BLIS under different error handling +// regimes. +#ifdef BLIS_ENABLE_ERROR_CHECKING +static BLIS_THREAD_LOCAL errlev_t bli_err_chk_level = BLIS_FULL_ERROR_CHECKING; +#else +static BLIS_THREAD_LOCAL errlev_t bli_err_chk_level = BLIS_NO_ERROR_CHECKING; +#endif + +// Primary user APIs. + +bool bli_error_checking_is_enabled( void ) { - fprintf( stderr, "\n" ); - fprintf( stderr, "libblis: %s (line %lu):\n", file, ( long unsigned int )line ); - fprintf( stderr, "libblis: %s\n", str ); - fflush( stderr ); + return bli_error_checking_level() != BLIS_NO_ERROR_CHECKING; } -void bli_abort( void ) +err_t bli_error_checking_enable( void ) { - fprintf( stderr, "libblis: Aborting.\n" ); - //raise( SIGABRT ); - abort(); + return bli_error_checking_level_set( BLIS_FULL_ERROR_CHECKING ); } -// ----------------------------------------------------------------------------- +err_t bli_error_checking_disable( void ) +{ + return bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); +} -// Current error checking level. -static BLIS_THREAD_LOCAL errlev_t bli_err_chk_level = BLIS_FULL_ERROR_CHECKING; +// Lower-level APIs. errlev_t bli_error_checking_level( void ) { return bli_err_chk_level; } -void bli_error_checking_level_set( errlev_t new_level ) +err_t bli_error_checking_level_set( errlev_t new_level ) { err_t e_val; e_val = bli_check_valid_error_level( new_level ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + // Acquire the mutex protecting bli_err_chk_level. + bli_pthread_mutex_lock( &err_level_mutex ); + + // BEGIN CRITICAL SECTION + { + bli_err_chk_level = new_level; + } + // END CRITICAL SECTION + + // Release the mutex protecting bli_err_chk_level. + bli_pthread_mutex_unlock( &err_level_mutex ); - bli_err_chk_level = new_level; + return BLIS_SUCCESS; } -bool bli_error_checking_is_enabled( void ) +// ----------------------------------------------------------------------------- + +// A mutex to allow synchronous access to the variable controlling the error +// handling mode. +static bli_pthread_mutex_t err_mode_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; + +// Set the default (initial) error handling mode based on how BLIS was +// configured. Note that we declare the variable as thread-local so that +// application threads can operate BLIS under different error handling +// regimes. +#ifdef BLIS_ENABLE_ERROR_RETURN +static BLIS_THREAD_LOCAL errmode_t bli_err_hand_mode = BLIS_ERROR_RETURN; +#else // #ifdef BLIS_ENABLE_ERROR_ABORT +static BLIS_THREAD_LOCAL errmode_t bli_err_hand_mode = BLIS_ERROR_ABORT; +#endif + +// Primary user APIs. + +bool bli_error_mode_is_return( void ) { - return bli_error_checking_level() != BLIS_NO_ERROR_CHECKING; + return bli_error_mode() == BLIS_ERROR_RETURN; +} + +bool bli_error_mode_is_abort( void ) +{ + return bli_error_mode() == BLIS_ERROR_ABORT; +} + +err_t bli_error_mode_set_return( void ) +{ + return bli_error_mode_set( BLIS_ERROR_RETURN ); } +err_t bli_error_mode_set_abort( void ) +{ + return bli_error_mode_set( BLIS_ERROR_ABORT ); +} + +// Lower-level APIs. + +errmode_t bli_error_mode( void ) +{ + return bli_err_hand_mode; +} + +err_t bli_error_mode_set( errmode_t new_mode ) +{ + err_t e_val; + + e_val = bli_check_valid_error_mode( new_mode ); + bli_check_return_error_code( e_val ); + + // Acquire the mutex protecting bli_err_hand_mode. + bli_pthread_mutex_lock( &err_mode_mutex ); + + // BEGIN CRITICAL SECTION + { + bli_err_hand_mode = new_mode; + } + // END CRITICAL SECTION + + // Release the mutex protecting bli_err_chk_level. + bli_pthread_mutex_unlock( &err_mode_mutex ); + + return BLIS_SUCCESS; +} + +// ----------------------------------------------------------------------------- + const char* bli_error_string_for_code( gint_t code ) { + // If the caller's error code is out of range, use a special error code to + // signify this. + if ( code <= BLIS_ERROR_CODE_MIN || BLIS_ERROR_CODE_MAX <= code ) + code = BLIS_UNDEFINED_ERROR_CODE; + + // Return the address of the string corresponding to the chosen error code. return bli_error_string[-code]; } +// ----------------------------------------------------------------------------- + +void bli_print_msg( const char* str, const char* file, guint_t line ) +{ + fprintf( stderr, "\n" ); + fprintf( stderr, "libblis: %s (line %lu):\n", file, ( long unsigned int )line ); + fprintf( stderr, "libblis: %s\n", str ); + fflush( stderr ); +} + +void bli_abort( void ) +{ + fprintf( stderr, "libblis: Aborting.\n" ); + //raise( SIGABRT ); + abort(); +} + diff --git a/frame/base/bli_error.h b/frame/base/bli_error.h index f3037e2c21..71f87073c8 100644 --- a/frame/base/bli_error.h +++ b/frame/base/bli_error.h @@ -34,13 +34,29 @@ */ -BLIS_EXPORT_BLIS errlev_t bli_error_checking_level( void ); -BLIS_EXPORT_BLIS void bli_error_checking_level_set( errlev_t new_level ); +BLIS_EXPORT_BLIS bool bli_error_checking_is_enabled( void ); +BLIS_EXPORT_BLIS err_t bli_error_checking_enable( void ); +BLIS_EXPORT_BLIS err_t bli_error_checking_disable( void ); -BLIS_EXPORT_BLIS bool bli_error_checking_is_enabled( void ); +BLIS_EXPORT_BLIS errlev_t bli_error_checking_level( void ); +BLIS_EXPORT_BLIS err_t bli_error_checking_level_set( errlev_t new_level ); -void bli_print_msg( const char* str, const char* file, guint_t line ); -BLIS_EXPORT_BLIS void bli_abort( void ); +// ----------------------------------------------------------------------------- -const char* bli_error_string_for_code( gint_t code ); +BLIS_EXPORT_BLIS bool bli_error_mode_is_return( void ); +BLIS_EXPORT_BLIS bool bli_error_mode_is_abort( void ); +BLIS_EXPORT_BLIS err_t bli_error_mode_set_return( void ); +BLIS_EXPORT_BLIS err_t bli_error_mode_set_abort( void ); + +BLIS_EXPORT_BLIS errmode_t bli_error_mode( void ); +BLIS_EXPORT_BLIS err_t bli_error_mode_set( errmode_t new_mode ); + +// ----------------------------------------------------------------------------- + +BLIS_EXPORT_BLIS const char* bli_error_string_for_code( gint_t code ); + +// ----------------------------------------------------------------------------- + +void bli_print_msg( const char* str, const char* file, guint_t line ); +BLIS_EXPORT_BLIS void bli_abort( void ); diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index 094810d9d2..5a1cb29bd9 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -52,184 +52,268 @@ typedef void (*nat_cntx_init_ft)( cntx_t* cntx ); typedef void (*ref_cntx_init_ft)( cntx_t* cntx ); typedef void (*ind_cntx_init_ft)( ind_t method, cntx_t* cntx ); +// A boolean that tracks whether bli_gks_init() has completed successfully. +static bool gks_is_init = FALSE; + // ----------------------------------------------------------------------------- -void bli_gks_init( void ) +bool bli_gks_is_init( void ) { - { - // Initialize the internal data structure we use to track registered - // contexts. - bli_gks_init_index(); + return gks_is_init; +} + +void bli_gks_mark_init( void ) +{ + gks_is_init = TRUE; +} - // Register a context for each architecture that was #define'd in - // bli_config.h. +void bli_gks_mark_uninit( void ) +{ + gks_is_init = FALSE; +} - // -- Intel architectures ---------------------------------------------- +// ----------------------------------------------------------------------------- + +err_t bli_gks_init( void ) +{ + err_t r_val; + + // NOTE: We assume this function is only called by one thread. + + // Sanity check: Return early if the API is already initialized. + if ( bli_gks_is_init() ) return BLIS_SUCCESS; + + // Initialize the internal data structure we use to track registered + // contexts. + bli_gks_init_index(); + + // Register a context for each architecture that was #define'd in + // bli_config.h. If any registration fails, finalize the gks before + // returning the error code. + + // -- Intel architectures ---------------------------------------------- #ifdef BLIS_CONFIG_SKX - bli_gks_register_cntx( BLIS_ARCH_SKX, bli_cntx_init_skx, - bli_cntx_init_skx_ref, - bli_cntx_init_skx_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_SKX, bli_cntx_init_skx, + bli_cntx_init_skx_ref, + bli_cntx_init_skx_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_KNL - bli_gks_register_cntx( BLIS_ARCH_KNL, bli_cntx_init_knl, - bli_cntx_init_knl_ref, - bli_cntx_init_knl_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_KNL, bli_cntx_init_knl, + bli_cntx_init_knl_ref, + bli_cntx_init_knl_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_KNC - bli_gks_register_cntx( BLIS_ARCH_KNC, bli_cntx_init_knc, - bli_cntx_init_knc_ref, - bli_cntx_init_knc_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_KNC, bli_cntx_init_knc, + bli_cntx_init_knc_ref, + bli_cntx_init_knc_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_HASWELL - bli_gks_register_cntx( BLIS_ARCH_HASWELL, bli_cntx_init_haswell, - bli_cntx_init_haswell_ref, - bli_cntx_init_haswell_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_HASWELL, bli_cntx_init_haswell, + bli_cntx_init_haswell_ref, + bli_cntx_init_haswell_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_SANDYBRIDGE - bli_gks_register_cntx( BLIS_ARCH_SANDYBRIDGE, bli_cntx_init_sandybridge, - bli_cntx_init_sandybridge_ref, - bli_cntx_init_sandybridge_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_SANDYBRIDGE, bli_cntx_init_sandybridge, + bli_cntx_init_sandybridge_ref, + bli_cntx_init_sandybridge_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_PENRYN - bli_gks_register_cntx( BLIS_ARCH_PENRYN, bli_cntx_init_penryn, - bli_cntx_init_penryn_ref, - bli_cntx_init_penryn_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_PENRYN, bli_cntx_init_penryn, + bli_cntx_init_penryn_ref, + bli_cntx_init_penryn_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- AMD architectures ------------------------------------------------ + // -- AMD architectures ------------------------------------------------ #ifdef BLIS_CONFIG_ZEN3 - bli_gks_register_cntx( BLIS_ARCH_ZEN3, bli_cntx_init_zen3, - bli_cntx_init_zen3_ref, - bli_cntx_init_zen3_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_ZEN3, bli_cntx_init_zen3, + bli_cntx_init_zen3_ref, + bli_cntx_init_zen3_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_ZEN2 - bli_gks_register_cntx( BLIS_ARCH_ZEN2, bli_cntx_init_zen2, - bli_cntx_init_zen2_ref, - bli_cntx_init_zen2_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_ZEN2, bli_cntx_init_zen2, + bli_cntx_init_zen2_ref, + bli_cntx_init_zen2_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_ZEN - bli_gks_register_cntx( BLIS_ARCH_ZEN, bli_cntx_init_zen, - bli_cntx_init_zen_ref, - bli_cntx_init_zen_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_ZEN, bli_cntx_init_zen, + bli_cntx_init_zen_ref, + bli_cntx_init_zen_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_EXCAVATOR - bli_gks_register_cntx( BLIS_ARCH_EXCAVATOR, bli_cntx_init_excavator, - bli_cntx_init_excavator_ref, - bli_cntx_init_excavator_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_EXCAVATOR, bli_cntx_init_excavator, + bli_cntx_init_excavator_ref, + bli_cntx_init_excavator_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_STEAMROLLER - bli_gks_register_cntx( BLIS_ARCH_STEAMROLLER, bli_cntx_init_steamroller, - bli_cntx_init_steamroller_ref, - bli_cntx_init_steamroller_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_STEAMROLLER, bli_cntx_init_steamroller, + bli_cntx_init_steamroller_ref, + bli_cntx_init_steamroller_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_PILEDRIVER - bli_gks_register_cntx( BLIS_ARCH_PILEDRIVER, bli_cntx_init_piledriver, - bli_cntx_init_piledriver_ref, - bli_cntx_init_piledriver_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_PILEDRIVER, bli_cntx_init_piledriver, + bli_cntx_init_piledriver_ref, + bli_cntx_init_piledriver_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_BULLDOZER - bli_gks_register_cntx( BLIS_ARCH_BULLDOZER, bli_cntx_init_bulldozer, - bli_cntx_init_bulldozer_ref, - bli_cntx_init_bulldozer_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_BULLDOZER, bli_cntx_init_bulldozer, + bli_cntx_init_bulldozer_ref, + bli_cntx_init_bulldozer_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- ARM architectures ------------------------------------------------ + // -- ARM architectures ------------------------------------------------ - // -- ARM-SVE -- + // -- ARM-SVE -- #ifdef BLIS_CONFIG_ARMSVE - bli_gks_register_cntx( BLIS_ARCH_ARMSVE, bli_cntx_init_armsve, - bli_cntx_init_armsve_ref, - bli_cntx_init_armsve_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_ARMSVE, bli_cntx_init_armsve, + bli_cntx_init_armsve_ref, + bli_cntx_init_armsve_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_A64FX - bli_gks_register_cntx( BLIS_ARCH_A64FX, bli_cntx_init_a64fx, - bli_cntx_init_a64fx_ref, - bli_cntx_init_a64fx_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_A64FX, bli_cntx_init_a64fx, + bli_cntx_init_a64fx_ref, + bli_cntx_init_a64fx_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- ARM-NEON (4 pipes x 128-bit vectors) -- + // -- ARM-NEON (4 pipes x 128-bit vectors) -- #ifdef BLIS_CONFIG_FIRESTORM - bli_gks_register_cntx( BLIS_ARCH_FIRESTORM, bli_cntx_init_firestorm, - bli_cntx_init_firestorm_ref, - bli_cntx_init_firestorm_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_FIRESTORM, bli_cntx_init_firestorm, + bli_cntx_init_firestorm_ref, + bli_cntx_init_firestorm_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- ARM (2 pipes x 128-bit vectors) -- + // -- ARM (2 pipes x 128-bit vectors) -- #ifdef BLIS_CONFIG_THUNDERX2 - bli_gks_register_cntx( BLIS_ARCH_THUNDERX2, bli_cntx_init_thunderx2, - bli_cntx_init_thunderx2_ref, - bli_cntx_init_thunderx2_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_THUNDERX2, bli_cntx_init_thunderx2, + bli_cntx_init_thunderx2_ref, + bli_cntx_init_thunderx2_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_CORTEXA57 - bli_gks_register_cntx( BLIS_ARCH_CORTEXA57, bli_cntx_init_cortexa57, - bli_cntx_init_cortexa57_ref, - bli_cntx_init_cortexa57_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_CORTEXA57, bli_cntx_init_cortexa57, + bli_cntx_init_cortexa57_ref, + bli_cntx_init_cortexa57_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_CORTEXA53 - bli_gks_register_cntx( BLIS_ARCH_CORTEXA53, bli_cntx_init_cortexa53, - bli_cntx_init_cortexa53_ref, - bli_cntx_init_cortexa53_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_CORTEXA53, bli_cntx_init_cortexa53, + bli_cntx_init_cortexa53_ref, + bli_cntx_init_cortexa53_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- ARM (older 32-bit microarchitectures) -- + // -- ARM (older 32-bit microarchitectures) -- #ifdef BLIS_CONFIG_CORTEXA15 - bli_gks_register_cntx( BLIS_ARCH_CORTEXA15, bli_cntx_init_cortexa15, - bli_cntx_init_cortexa15_ref, - bli_cntx_init_cortexa15_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_CORTEXA15, bli_cntx_init_cortexa15, + bli_cntx_init_cortexa15_ref, + bli_cntx_init_cortexa15_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_CORTEXA9 - bli_gks_register_cntx( BLIS_ARCH_CORTEXA9, bli_cntx_init_cortexa9, - bli_cntx_init_cortexa9_ref, - bli_cntx_init_cortexa9_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_CORTEXA9, bli_cntx_init_cortexa9, + bli_cntx_init_cortexa9_ref, + bli_cntx_init_cortexa9_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- IBM architectures ------------------------------------------------ + // -- IBM architectures ------------------------------------------------ #ifdef BLIS_CONFIG_POWER10 - bli_gks_register_cntx( BLIS_ARCH_POWER10, bli_cntx_init_power10, - bli_cntx_init_power10_ref, - bli_cntx_init_power10_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_POWER10, bli_cntx_init_power10, + bli_cntx_init_power10_ref, + bli_cntx_init_power10_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_POWER9 - bli_gks_register_cntx( BLIS_ARCH_POWER9, bli_cntx_init_power9, - bli_cntx_init_power9_ref, - bli_cntx_init_power9_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_POWER9, bli_cntx_init_power9, + bli_cntx_init_power9_ref, + bli_cntx_init_power9_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_POWER7 - bli_gks_register_cntx( BLIS_ARCH_POWER7, bli_cntx_init_power7, - bli_cntx_init_power7_ref, - bli_cntx_init_power7_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_POWER7, bli_cntx_init_power7, + bli_cntx_init_power7_ref, + bli_cntx_init_power7_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_BGQ - bli_gks_register_cntx( BLIS_ARCH_BGQ, bli_cntx_init_bgq, - bli_cntx_init_bgq_ref, - bli_cntx_init_bgq_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_BGQ, bli_cntx_init_bgq, + bli_cntx_init_bgq_ref, + bli_cntx_init_bgq_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- Generic architectures -------------------------------------------- + // -- Generic architectures -------------------------------------------- #ifdef BLIS_CONFIG_GENERIC - bli_gks_register_cntx( BLIS_ARCH_GENERIC, bli_cntx_init_generic, - bli_cntx_init_generic_ref, - bli_cntx_init_generic_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_GENERIC, bli_cntx_init_generic, + bli_cntx_init_generic_ref, + bli_cntx_init_generic_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - } + + // Mark the API as initialized. + bli_gks_mark_init(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_gks_finalize( void ) +err_t bli_gks_finalize( void ) { arch_t id; ind_t ind; - // BEGIN CRITICAL SECTION - // NOTE: This critical section is implicit. We assume this function is only - // called from within the critical section within bli_finalize(). - { + // NOTE: We assume this function is only called by one thread. + // Sanity check: Return early if the API is uninitialized. + if ( !bli_gks_is_init() ) return BLIS_SUCCESS; + + { // Iterate over the architectures in the gks array. for ( id = 0; id < BLIS_NUM_ARCHS; ++id ) { @@ -263,10 +347,18 @@ void bli_gks_finalize( void ) // Free the array of BLIS_NUM_IND_METHODS cntx* elements. bli_free_intl( gks_id ); } - } + // Set gks[ id ] to NULL. Not necessary, since bli_gks_init_index() + // will reset all elements of the gks array to zero (NULL) the next + // time the bli_gks_init() is called, but also doesn't hurt. + gks[ id ] = NULL; + } } - // END CRITICAL SECTION + + // Mark the API as uninitialized. + bli_gks_mark_uninit(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -276,8 +368,8 @@ void bli_gks_init_index( void ) // This function is called by bli_gks_init(). It simply initializes all // architecture id elements of the internal arrays to NULL. - const size_t gks_size = sizeof( cntx_t* ) * BLIS_NUM_ARCHS; - const size_t fpa_size = sizeof( void_fp ) * BLIS_NUM_ARCHS; + const size_t gks_size = sizeof( cntx_t** ) * BLIS_NUM_ARCHS; + const size_t fpa_size = sizeof( void_fp ) * BLIS_NUM_ARCHS; // Set every entry in gks and context init function pointer arrays to // zero/NULL. This is done so that later on we know which ones were @@ -289,23 +381,24 @@ void bli_gks_init_index( void ) // ----------------------------------------------------------------------------- -const cntx_t* bli_gks_lookup_nat_cntx +err_t bli_gks_lookup_nat_cntx ( - arch_t id + arch_t id, + const cntx_t** cntx ) { // Return the address of the (native) context for a given architecture id. // This function assumes the architecture has already been registered. - - return bli_gks_lookup_ind_cntx( id, BLIS_NAT ); + return bli_gks_lookup_ind_cntx( id, BLIS_NAT, cntx ); } // ----------------------------------------------------------------------------- -const cntx_t* bli_gks_lookup_ind_cntx +err_t bli_gks_lookup_ind_cntx ( - arch_t id, - ind_t ind + arch_t id, + ind_t ind, + const cntx_t** cntx ) { // Return the address of the context for a given architecture id and @@ -318,7 +411,7 @@ const cntx_t* bli_gks_lookup_ind_cntx if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); } // Index into the array of context pointers for the given architecture id, @@ -327,7 +420,9 @@ const cntx_t* bli_gks_lookup_ind_cntx cntx_t* gks_id_ind = gks_id[ ind ]; // Return the context pointer at gks_id_ind. - return gks_id_ind; + *cntx = gks_id_ind; + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -351,7 +446,7 @@ const cntx_t* const * bli_gks_lookup_id // ----------------------------------------------------------------------------- -void bli_gks_register_cntx +err_t bli_gks_register_cntx ( arch_t id, void_fp nat_fp, @@ -380,11 +475,9 @@ void bli_gks_register_cntx if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); } - nat_cntx_init_ft f = nat_fp; - // First, store the function pointers to the context initialization // functions for reference kernels and induced method execution. The // former will be used whenever we need to obtain reference kernels and @@ -399,7 +492,7 @@ void bli_gks_register_cntx // This is really just a safety feature to prevent memory leaks; this // early return should never occur, because the caller should never try // to register with an architecture id that has already been registered. - if ( gks[ id ] != NULL ) return; + if ( gks[ id ] != NULL ) return BLIS_SUCCESS; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_gks_register_cntx(): " ); @@ -410,6 +503,7 @@ void bli_gks_register_cntx // zeros/NULL, storing the address of the alloacted memory at the element // for the current architecture id. gks[ id ] = bli_calloc_intl( sizeof( cntx_t* ) * BLIS_NUM_IND_METHODS, &r_val ); + bli_check_return_if_failure( r_val ); // Alias the allocated array for readability. cntx_t** gks_id = gks[ id ]; @@ -418,14 +512,16 @@ void bli_gks_register_cntx printf( "bli_gks_register_cntx(): " ); #endif - // Allocate memory for a single context and store the address at - // the element in the gks[ id ] array that is reserved for native - // execution. + // Allocate memory for a single context and store the address at the element + // in the gks[ id ] array that is reserved for native execution. gks_id[ BLIS_NAT ] = bli_calloc_intl( sizeof( cntx_t ), &r_val ); + bli_check_return_if_failure( r_val ); // Alias the allocated context address for readability. cntx_t* gks_id_nat = gks_id[ BLIS_NAT ]; + nat_cntx_init_ft f = nat_fp; + // Call the context initialization function on the element of the newly // allocated array corresponding to native execution. f( gks_id_nat ); @@ -454,58 +550,70 @@ void bli_gks_register_cntx const blksz_t* nr = bli_cntx_get_blksz( BLIS_NR, gks_id_nat ); const blksz_t* kr = bli_cntx_get_blksz( BLIS_KR, gks_id_nat ); - e_val = bli_check_valid_mc_mod_mult( mc, mr ); bli_check_error_code( e_val ); - e_val = bli_check_valid_nc_mod_mult( nc, nr ); bli_check_error_code( e_val ); - e_val = bli_check_valid_kc_mod_mult( kc, kr ); bli_check_error_code( e_val ); + e_val = bli_check_valid_mc_mod_mult( mc, mr ); bli_check_return_error_code( e_val ); + e_val = bli_check_valid_nc_mod_mult( nc, nr ); bli_check_return_error_code( e_val ); + e_val = bli_check_valid_kc_mod_mult( kc, kr ); bli_check_return_error_code( e_val ); #ifndef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS - e_val = bli_check_valid_mc_mod_mult( mc, nr ); bli_check_error_code( e_val ); - e_val = bli_check_valid_nc_mod_mult( nc, mr ); bli_check_error_code( e_val ); + e_val = bli_check_valid_mc_mod_mult( mc, nr ); bli_check_return_error_code( e_val ); + e_val = bli_check_valid_nc_mod_mult( nc, mr ); bli_check_return_error_code( e_val ); #endif // Verify that the register blocksizes in the context are sufficiently large // relative to the maximum stack buffer size defined at configure-time. e_val = bli_check_sufficient_stack_buf_size( gks_id_nat ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -const cntx_t* bli_gks_query_cntx( void ) +err_t bli_gks_query_cntx( const cntx_t** cntx ) { - return bli_gks_query_nat_cntx(); + return bli_gks_query_nat_cntx( cntx ); } -const cntx_t* bli_gks_query_nat_cntx( void ) +err_t bli_gks_query_nat_cntx( const cntx_t** cntx ) { - bli_init_once(); + BLIS_INIT_ONCE(); + + arch_t id; + err_t r_val; // Return the address of the native context for the architecture id // corresponding to the current hardware, as determined by // bli_arch_query_id(). // Query the architecture id. - arch_t id = bli_arch_query_id(); + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); // Use the architecture id to look up a pointer to its context. - const cntx_t* cntx = bli_gks_lookup_nat_cntx( id ); + r_val = bli_gks_lookup_nat_cntx( id, cntx ); + bli_check_return_if_failure( r_val ); - return cntx; + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -const cntx_t* bli_gks_query_cntx_noinit( void ) +err_t bli_gks_query_cntx_noinit( const cntx_t** cntx ) { + arch_t id; + err_t r_val; + // This function is identical to bli_gks_query_cntx(), except that it // does not call bli_init_once(). // Query the architecture id. - arch_t id = bli_arch_query_id(); + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); // Use the architecture id to look up a pointer to its context. - const cntx_t* cntx = bli_gks_lookup_nat_cntx( id ); + r_val = bli_gks_lookup_nat_cntx( id, cntx ); + bli_check_return_if_failure( r_val ); - return cntx; + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -514,39 +622,41 @@ const cntx_t* bli_gks_query_cntx_noinit( void ) // with a new entry corresponding to a context for an ind_t value. static bli_pthread_mutex_t gks_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; -const cntx_t* bli_gks_query_ind_cntx +err_t bli_gks_query_ind_cntx ( - ind_t ind + ind_t ind, + const cntx_t** cntx ) { - bli_init_once(); + BLIS_INIT_ONCE(); + arch_t id; cntx_t* gks_id_ind; - err_t r_val; + err_t r_val; // Return the address of a context that will be suited for executing a - // level-3 operation via the requested induced method (and datatype) for - // the architecture id corresponding to the current hardware, as - // determined by bli_arch_query_id(). - - // This function is called when a level-3 operation via induced method is - // called, e.g. bli_gemm1m(). If this is the first time that induced method - // is being executed since bli_gks_init(), the necessary context structure - // is allocated and initialized. If this is not the first time, then the - // address of a previously-allocated and initialized (cached) context is - // returned. Note that much of this must be done with mutual exclusion to - // ensure thread safety and deterministic behavior. + // level-3 operation via the requested induced method for the arch_t id + // corresponding to the current hardware, as determined by + // bli_arch_query_id(). - // Query the architecture id. - arch_t id = bli_arch_query_id(); + // If this is the first time that induced method is being executed since + // bli_gks_init(), the necessary context structure is allocated and + // initialized. If this is not the first time, then the address of a + // previously-allocated and initialized (cached) context is returned. + // Note that much of this must be done with mutual exclusion to ensure + // thread safety and deterministic behavior. - // Sanity check: verify that the arch_t id is valid. + // Sanity check: verify that the induced method id is valid. if ( bli_error_checking_is_enabled() ) { - err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); + err_t e_val = bli_check_valid_ind( ind ); + bli_check_return_error_code( e_val ); } + // Query the architecture id. + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); + // NOTE: These initial statements can reside outside of the critical section // because gks[ id ] should have already been allocated, and the native // context in that array should have already been allocated/initialized. @@ -558,7 +668,7 @@ const cntx_t* bli_gks_query_ind_cntx // If for some reason the native context was requested, we can return // its address early. - if ( ind == BLIS_NAT ) return gks_id_nat; + if ( ind == BLIS_NAT ) { *cntx = gks_id_nat; return BLIS_SUCCESS; } // This function assumes that the architecture idenified by id has // already been registered with the gks (which guarantees that @@ -581,24 +691,28 @@ const cntx_t* bli_gks_query_ind_cntx // If gks_id_ind is NULL, then we know we must allocate and then // initialize the context, storing its address back to // gks_id[ ind ]. - gks_id_ind = bli_calloc_intl( sizeof( cntx_t ), &r_val ); - gks_id[ ind ] = gks_id_ind; - - // Before we can call the induced method context initialization - // function on the newly allocated structure, we must first copy - // over the contents of the native context. - *gks_id_ind = *gks_id_nat; - - // Use the architecture id to look up the function pointer to the - // context initialization function for induced methods. - ind_cntx_init_ft f = cntx_ind_init[ id ]; - - // Now we modify the context (so that it contains the proper values - // for its induced method) by calling the context initialization - // function for the current induced method. (That function assumes - // that the context is pre- initialized with values for native - // execution.) - f( ind, gks_id_ind ); + gks_id_ind = bli_calloc_intl( sizeof( cntx_t ), &r_val ); + + if ( bli_is_success( r_val ) ) + { + gks_id[ ind ] = gks_id_ind; + + // Before we can call the induced method context initialization + // function on the newly allocated structure, we must first copy + // over the contents of the native context. + *gks_id_ind = *gks_id_nat; + + // Use the architecture id to look up the function pointer to the + // context initialization function for induced methods. + ind_cntx_init_ft f = cntx_ind_init[ id ]; + + // Now we modify the context (so that it contains the proper values + // for its induced method) by calling the context initialization + // function for the current induced method. (That function assumes + // that the context is pre- initialized with values for native + // execution.) + f( ind, gks_id_ind ); + } } } // END CRITICAL SECTION @@ -606,27 +720,29 @@ const cntx_t* bli_gks_query_ind_cntx // Release the mutex protecting the gks. bli_pthread_mutex_unlock( &gks_mutex ); + // Now that we're out of the critical section, we can return if + // bli_calloc_intl() failed. + bli_check_return_if_failure( r_val ); + // Return the address of the newly-allocated/initialized context. - return gks_id_ind; + *cntx = gks_id_ind; + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_gks_init_ref_cntx +err_t bli_gks_init_ref_cntx ( cntx_t* cntx ) { - // Query the architecture id. - arch_t id = bli_arch_query_id(); + arch_t id; + err_t r_val; - // Sanity check: verify that the arch_t id is valid. - if ( bli_error_checking_is_enabled() ) - { - err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); - } + // Query the architecture id. + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); // Obtain the function pointer to the context initialization function for // reference kernels. @@ -634,22 +750,26 @@ void bli_gks_init_ref_cntx // Initialize the caller's context with reference kernels and related values. f( cntx ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -bool bli_gks_cntx_l3_nat_ukr_is_ref +err_t bli_gks_cntx_l3_nat_ukr_is_ref ( num_t dt, ukr_t ukr_id, - const cntx_t* cntx + const cntx_t* cntx, + bool* is_ref ) { cntx_t ref_cntx; + err_t r_val; - // Initialize a context with reference kernels for the arch_t id queried - // via bli_arch_query_id(). - bli_gks_init_ref_cntx( &ref_cntx ); + // Initialize a context with reference kernels. + r_val = bli_gks_init_ref_cntx( &ref_cntx ); + bli_check_return_if_failure( r_val ); // Query each context for the micro-kernel function pointer for the // specified datatype. @@ -657,7 +777,9 @@ bool bli_gks_cntx_l3_nat_ukr_is_ref void_fp fp = bli_cntx_get_ukr_dt( dt, ukr_id, cntx ); // Return the result. - return fp == ref_fp; + *is_ref = ( fp == ref_fp ); + + return BLIS_SUCCESS; } // @@ -674,15 +796,23 @@ static const char* bli_gks_l3_ukr_impl_str[BLIS_NUM_UKR_IMPL_TYPES] = // ----------------------------------------------------------------------------- -const char* bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt ) +err_t bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt, const char** str ) { - kimpl_t ki; + BLIS_INIT_ONCE(); + + err_t r_val; + kimpl_t ki; + const cntx_t* cntx; + void_fp fp; // Query the context for the current induced method and datatype, and // then query the ukernel function pointer for the given datatype from // that context. - const cntx_t* cntx = bli_gks_query_ind_cntx( method ); - void_fp fp = bli_cntx_get_ukr_dt( dt, ukr, cntx ); + r_val = bli_gks_query_ind_cntx( method, &cntx ); + bli_check_return_if_failure( r_val ); + + fp = bli_cntx_get_ukr_dt( dt, ukr, cntx ); + //bli_check_return_if_failure( r_val ); // Check whether the ukernel function pointer is NULL for the given // datatype. If it is NULL, return the string for not applicable. @@ -691,17 +821,23 @@ const char* bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt ) if ( fp == NULL ) ki = BLIS_NOTAPPLIC_UKERNEL; else - ki = bli_gks_l3_ukr_impl_type( ukr, method, dt ); + { + r_val = bli_gks_l3_ukr_impl_type( ukr, method, dt, &ki ); + bli_check_return_if_failure( r_val ); + } + + *str = bli_gks_l3_ukr_impl_str[ ki ]; - return bli_gks_l3_ukr_impl_str[ ki ]; + return BLIS_SUCCESS; } #if 0 -char* bli_gks_l3_ukr_avail_impl_string( ukr_t ukr, num_t dt ) +err_t bli_gks_l3_ukr_avail_impl_string( ukr_t ukr, num_t dt, const char** str ) { opid_t oper; ind_t method; kimpl_t ki; + err_t r_val; // We need to decide which operation we will use to query the // current available induced method. If the ukr type given is @@ -716,44 +852,49 @@ char* bli_gks_l3_ukr_avail_impl_string( ukr_t ukr, num_t dt ) // Query the ukernel implementation type using the current // available method. - ki = bli_gks_l3_ukr_impl_type( ukr, method, dt ); + r_val = bli_gks_l3_ukr_impl_type( ukr, method, dt, ki ); + bli_check_return_if_failure( r_val ); + + *str = bli_ukr_impl_str[ ki ]; - return bli_ukr_impl_str[ ki ]; + return BLIS_SUCCESS; } #endif -kimpl_t bli_gks_l3_ukr_impl_type( ukr_t ukr, ind_t method, num_t dt ) +err_t bli_gks_l3_ukr_impl_type( ukr_t ukr, ind_t method, num_t dt, kimpl_t* ki ) { // If the current available induced method is not native, it // must be virtual. - if ( method != BLIS_NAT ) return BLIS_VIRTUAL_UKERNEL; + if ( method != BLIS_NAT ) *ki = BLIS_VIRTUAL_UKERNEL; else { - // If the current available induced method for the gemm - // operation is native, then it might be reference or - // optimized. To determine which, we compare the - // datatype-specific function pointer within the ukrs - // object corresponding to the current available induced - // method to the typed function pointer within the known - // reference ukrs object. + // If the current available induced method for the gemm operation + // is native, then it might be reference or optimized. To determine + // which, we compare the datatype-specific function pointer within + // the ukrs object corresponding to the current available induced + // method to the typed function pointer within the known reference + // ukrs object. - // Query the architecture id. - arch_t id = bli_arch_query_id(); + arch_t id; + err_t r_val; - // Sanity check: verify that the arch_t id is valid. - if ( bli_error_checking_is_enabled() ) - { - err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); - } + // Query the architecture id. + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); // Query the native context from the gks. - const cntx_t* nat_cntx = bli_gks_lookup_nat_cntx( id ); + const cntx_t* nat_cntx; + r_val = bli_gks_lookup_nat_cntx( id, &nat_cntx ); + bli_check_return_if_failure( r_val ); - if ( bli_gks_cntx_l3_nat_ukr_is_ref( dt, ukr, nat_cntx ) ) - return BLIS_REFERENCE_UKERNEL; - else - return BLIS_OPTIMIZED_UKERNEL; + bool is_ref; + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( dt, ukr, nat_cntx, &is_ref ); + bli_check_return_if_failure( r_val ); + + if ( is_ref ) *ki = BLIS_REFERENCE_UKERNEL; + else *ki = BLIS_OPTIMIZED_UKERNEL; } + + return BLIS_SUCCESS; } diff --git a/frame/base/bli_gks.h b/frame/base/bli_gks.h index 3a93fd59e8..6910c7d716 100644 --- a/frame/base/bli_gks.h +++ b/frame/base/bli_gks.h @@ -35,31 +35,63 @@ #ifndef BLIS_GKS_H #define BLIS_GKS_H -void bli_gks_init( void ); -void bli_gks_finalize( void ); +bool bli_gks_is_init( void ); +void bli_gks_mark_init( void ); +void bli_gks_mark_uninit( void ); -void bli_gks_init_index( void ); +err_t bli_gks_init( void ); +err_t bli_gks_finalize( void ); -const cntx_t* bli_gks_lookup_nat_cntx( arch_t id ); -const cntx_t* bli_gks_lookup_ind_cntx( arch_t id, ind_t ind ); -const cntx_t* const * bli_gks_lookup_id( arch_t id ); -void bli_gks_register_cntx( arch_t id, void_fp nat_fp, void_fp ref_fp, void_fp ind_fp ); +void bli_gks_init_index( void ); -BLIS_EXPORT_BLIS const cntx_t* bli_gks_query_cntx( void ); -BLIS_EXPORT_BLIS const cntx_t* bli_gks_query_nat_cntx( void ); +err_t bli_gks_lookup_nat_cntx( arch_t id, const cntx_t** cntx ); +err_t bli_gks_lookup_ind_cntx( arch_t id, ind_t ind, const cntx_t** cntx ); +const cntx_t* const * bli_gks_lookup_id( arch_t id ); +err_t bli_gks_register_cntx( arch_t id, void_fp nat_fp, void_fp ref_fp, void_fp ind_fp ); -const cntx_t* bli_gks_query_cntx_noinit( void ); +BLIS_EXPORT_BLIS err_t bli_gks_query_cntx( const cntx_t** cntx ); +BLIS_EXPORT_BLIS err_t bli_gks_query_nat_cntx( const cntx_t** cntx ); -BLIS_EXPORT_BLIS const cntx_t* bli_gks_query_ind_cntx( ind_t ind ); +err_t bli_gks_query_cntx_noinit( const cntx_t** cntx ); -BLIS_EXPORT_BLIS void bli_gks_init_ref_cntx( cntx_t* cntx ); +BLIS_EXPORT_BLIS err_t bli_gks_query_ind_cntx( ind_t ind, const cntx_t** cntx ); -bool bli_gks_cntx_l3_nat_ukr_is_ref( num_t dt, ukr_t ukr_id, const cntx_t* cntx ); +BLIS_EXPORT_BLIS err_t bli_gks_init_ref_cntx( cntx_t* cntx ); -BLIS_EXPORT_BLIS const char* bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt ); -BLIS_EXPORT_BLIS kimpl_t bli_gks_l3_ukr_impl_type( ukr_t ukr, ind_t method, num_t dt ); +err_t bli_gks_cntx_l3_nat_ukr_is_ref( num_t dt, ukr_t ukr_id, const cntx_t* cntx, bool* is_ref ); -//char* bli_gks_l3_ukr_avail_impl_string( ukr_t ukr, num_t dt ); +BLIS_EXPORT_BLIS err_t bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_gks_l3_ukr_impl_type( ukr_t ukr, ind_t method, num_t dt, kimpl_t* ki ); + +// +// -- cntx_t* query convenience wrapper ---------------------------------------- +// + +BLIS_INLINE err_t bli_gks_query_cntx_if_null( const cntx_t** cntx ) +{ + err_t r_val; + + if ( *cntx == NULL ) + { + r_val = bli_gks_query_nat_cntx( cntx ); + bli_check_return_if_failure( r_val ); + } + + return BLIS_SUCCESS; +} + +BLIS_INLINE err_t bli_gks_query_ind_cntx_if_null( ind_t im, const cntx_t** cntx ) +{ + err_t r_val; + + if ( *cntx == NULL ) + { + r_val = bli_gks_query_ind_cntx( im, cntx ); + bli_check_return_if_failure( r_val ); + } + + return BLIS_SUCCESS; +} #endif diff --git a/frame/base/bli_ind.c b/frame/base/bli_ind.c index fbe7404654..fd3376f4e0 100644 --- a/frame/base/bli_ind.c +++ b/frame/base/bli_ind.c @@ -40,13 +40,45 @@ static const char* bli_ind_impl_str[BLIS_NUM_IND_METHODS] = /* nat */ "native", }; +// A boolean that tracks whether bli_ind_init() has completed successfully. +static bool ind_is_init = FALSE; + // ----------------------------------------------------------------------------- -void bli_ind_init( void ) +bool bli_ind_is_init( void ) +{ + return ind_is_init; +} + +void bli_ind_mark_init( void ) { + ind_is_init = TRUE; +} + +void bli_ind_mark_uninit( void ) +{ + ind_is_init = FALSE; +} + +// ----------------------------------------------------------------------------- + +err_t bli_ind_init( void ) +{ + const cntx_t* cntx; + err_t r_val; + + // NOTE: We assume this function is only called by one thread. + + // Sanity check: Return early if the API is already initialized. + if ( bli_ind_is_init() ) return BLIS_SUCCESS; + // NOTE: Instead of calling bli_gks_query_cntx(), we call // bli_gks_query_cntx_noinit() to avoid the call to bli_init_once(). - const cntx_t* cntx = bli_gks_query_cntx_noinit(); + r_val = bli_gks_query_cntx_noinit( &cntx ); + bli_check_return_if_failure( r_val ); + + bool s_is_ref, c_is_ref, + d_is_ref, z_is_ref; // For each precision, enable the default induced method (1m) if both of // the following conditions are met: @@ -55,17 +87,38 @@ void bli_ind_init( void ) // The second condition means that BLIS will not bother to use an induced // method if both the real and complex domain kernels are reference. - bool s_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_FLOAT, BLIS_GEMM_UKR, cntx ); - bool d_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_DOUBLE, BLIS_GEMM_UKR, cntx ); - bool c_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_SCOMPLEX, BLIS_GEMM_UKR, cntx ); - bool z_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_DCOMPLEX, BLIS_GEMM_UKR, cntx ); + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_FLOAT, BLIS_GEMM_UKR, cntx, &s_is_ref ); + bli_check_return_if_failure( r_val ); + + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_DOUBLE, BLIS_GEMM_UKR, cntx, &d_is_ref ); + bli_check_return_if_failure( r_val ); + + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_SCOMPLEX, BLIS_GEMM_UKR, cntx, &c_is_ref ); + bli_check_return_if_failure( r_val ); + + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_DCOMPLEX, BLIS_GEMM_UKR, cntx, &z_is_ref ); + bli_check_return_if_failure( r_val ); if ( c_is_ref && !s_is_ref ) bli_ind_enable_dt( BLIS_1M, BLIS_SCOMPLEX ); if ( z_is_ref && !d_is_ref ) bli_ind_enable_dt( BLIS_1M, BLIS_DCOMPLEX ); + + // Mark the API as initialized. + bli_ind_mark_init(); + + return BLIS_SUCCESS; } -void bli_ind_finalize( void ) +err_t bli_ind_finalize( void ) { + // NOTE: We assume this function is only called by one thread. + + // Sanity check: Return early if the API is uninitialized. + if ( !bli_ind_is_init() ) return BLIS_SUCCESS; + + // Mark the API as uninitialized. + bli_ind_mark_uninit(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -176,11 +229,17 @@ ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ) return method; } -const char* bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt ) +// ----------------------------------------------------------------------------- + +err_t bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt, const char** str ) { + BLIS_INIT_ONCE(); + ind_t method = bli_ind_oper_find_avail( oper, dt ); - return bli_ind_get_impl_string( method ); + *str = bli_ind_get_impl_string( method ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -192,10 +251,9 @@ const char* bli_ind_get_impl_string( ind_t method ) num_t bli_ind_map_cdt_to_index( num_t dt ) { - // A non-complex datatype should never be passed in. - if ( !bli_is_complex( dt ) ) bli_abort(); - - // Map the complex datatype to a zero-based index. + // Map the complex datatype to a zero-based index that matches up with + // the expectations of the induced-method-per-operation state array in + // bli_l3_ind.c. if ( bli_is_scomplex( dt ) ) return 0; else /* if ( bli_is_dcomplex( dt ) ) */ return 1; } diff --git a/frame/base/bli_ind.h b/frame/base/bli_ind.h index e162c5809b..3cf4b24596 100644 --- a/frame/base/bli_ind.h +++ b/frame/base/bli_ind.h @@ -38,26 +38,30 @@ // level-3 induced method management #include "bli_l3_ind.h" -void bli_ind_init( void ); -void bli_ind_finalize( void ); +bool bli_ind_is_init( void ); +void bli_ind_mark_init( void ); +void bli_ind_mark_uninit( void ); -BLIS_EXPORT_BLIS void bli_ind_enable( ind_t method ); -BLIS_EXPORT_BLIS void bli_ind_disable( ind_t method ); -BLIS_EXPORT_BLIS void bli_ind_disable_all( void ); +err_t bli_ind_init( void ); +err_t bli_ind_finalize( void ); -BLIS_EXPORT_BLIS void bli_ind_enable_dt( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS void bli_ind_disable_dt( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS void bli_ind_disable_all_dt( num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_enable( ind_t method ); +BLIS_EXPORT_BLIS void bli_ind_disable( ind_t method ); +BLIS_EXPORT_BLIS void bli_ind_disable_all( void ); -BLIS_EXPORT_BLIS void bli_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_enable_dt( ind_t method, num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_disable_dt( ind_t method, num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_disable_all_dt( num_t dt ); -BLIS_EXPORT_BLIS bool bli_ind_oper_is_impl( opid_t oper, ind_t method ); -BLIS_EXPORT_BLIS ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ); -const char* bli_ind_get_impl_string( ind_t method ); -num_t bli_ind_map_cdt_to_index( num_t dt ); +BLIS_EXPORT_BLIS bool bli_ind_oper_is_impl( opid_t oper, ind_t method ); +BLIS_EXPORT_BLIS ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ); +BLIS_EXPORT_BLIS err_t bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt, const char** str ); + +const char* bli_ind_get_impl_string( ind_t method ); +num_t bli_ind_map_cdt_to_index( num_t dt ); #endif diff --git a/frame/base/bli_info.c b/frame/base/bli_info.c index 72b54ca20c..8b691908cf 100644 --- a/frame/base/bli_info.c +++ b/frame/base/bli_info.c @@ -162,29 +162,112 @@ gint_t bli_info_get_enable_sandbox( void ) // -- Level-3 kernel definitions -- -const char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMM_UKR, method, dt ); } -const char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_L_UKR, method, dt ); } -const char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_U_UKR, method, dt ); } -const char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_TRSM_L_UKR, method, dt ); } -const char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_TRSM_U_UKR, method, dt ); } +err_t bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_GEMM_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_L_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_U_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_TRSM_L_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_TRSM_U_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} // -- BLIS implementation query (level-3) -------------------------------------- -const char* bli_info_get_gemm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMM, dt ); } -const char* bli_info_get_gemmt_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_hemm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_HEMM, dt ); } -const char* bli_info_get_herk_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_her2k_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_symm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_SYMM, dt ); } -const char* bli_info_get_syrk_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_syr2k_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_trmm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRMM, dt ); } -const char* bli_info_get_trmm3_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRMM3, dt ); } -const char* bli_info_get_trsm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRSM, dt ); } - +err_t bli_info_get_gemm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_GEMM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_hemm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_HEMM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_herk_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_HERK, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_her2k_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_HER2K, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_symm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_SYMM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_syrk_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_SYRK, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_syr2k_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_SYR2K, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trmm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_TRMM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trmm3_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_TRMM3, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trsm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_TRSM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} diff --git a/frame/base/bli_info.h b/frame/base/bli_info.h index 250504c231..6f16d7e547 100644 --- a/frame/base/bli_info.h +++ b/frame/base/bli_info.h @@ -81,24 +81,23 @@ BLIS_EXPORT_BLIS gint_t bli_info_get_enable_sandbox( void ); // -- Level-3 kernel definitions -- -BLIS_EXPORT_BLIS const char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ); +BLIS_EXPORT_BLIS err_t bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt, const char** str ); // -- BLIS implementation query (level-3) -------------------------------------- -BLIS_EXPORT_BLIS const char* bli_info_get_gemm_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_gemmt_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_hemm_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_herk_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_her2k_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_symm_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_syrk_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_syr2k_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trmm_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trmm3_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trsm_impl_string( num_t dt ); +BLIS_EXPORT_BLIS err_t bli_info_get_gemm_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_hemm_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_herk_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_her2k_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_symm_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_syrk_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_syr2k_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trmm_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trmm3_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trsm_impl_string( num_t dt, const char** str ); diff --git a/frame/base/bli_init.c b/frame/base/bli_init.c index f1baa2c217..6c1ec3c80d 100644 --- a/frame/base/bli_init.c +++ b/frame/base/bli_init.c @@ -37,68 +37,114 @@ // ----------------------------------------------------------------------------- -void bli_init( void ) +err_t bli_init( void ) { - bli_init_once(); + BLIS_INIT_ONCE(); + + return BLIS_SUCCESS; } -void bli_finalize( void ) +err_t bli_finalize( void ) { - bli_finalize_once(); + BLIS_FINALIZE_ONCE(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_init_auto( void ) +err_t bli_init_auto( void ) { - bli_init_once(); + // NOTE: Most callers of this function (e.g. the BLAS compatibility layer) + // will ignore the return value of this function since those functions can't + // return error codes. + BLIS_INIT_ONCE(); + + return BLIS_SUCCESS; } -void bli_finalize_auto( void ) +err_t bli_finalize_auto( void ) { // The _auto() functions are used when initializing the BLAS compatibility // layer. It would not make much sense to automatically initialize and // finalize for every BLAS routine call; therefore, we remain initialized // unless and until the application explicitly calls bli_finalize(). + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- static bli_pthread_switch_t lib_state = BLIS_PTHREAD_SWITCH_INIT; -void bli_init_once( void ) +err_t bli_init_once( void ) { - bli_pthread_switch_on( &lib_state, bli_init_apis ); + // We can typecast from the return value of bli_pthread_switch_on() + // (which is of type 'int') directly to 'err_t' since they share the same + // basic semantics: 0 indicates success while all other values represent + // some kind of error. + return ( err_t )bli_pthread_switch_on( &lib_state, bli_init_apis ); } -void bli_finalize_once( void ) +err_t bli_finalize_once( void ) { - bli_pthread_switch_off( &lib_state, bli_finalize_apis ); + // We can typecast from the return value of bli_pthread_switch_off() + // (which is of type 'int') directly to 'err_t' since they share the same + // basic semantics: 0 indicates success while all other values represent + // some kind of error. + return ( err_t )bli_pthread_switch_off( &lib_state, bli_finalize_apis ); } // ----------------------------------------------------------------------------- int bli_init_apis( void ) { - // Initialize various sub-APIs. - bli_gks_init(); - bli_ind_init(); - bli_thread_init(); - bli_pack_init(); - bli_memsys_init(); - - return 0; + err_t r_val = BLIS_SUCCESS; + + // NOTE: Each of the sub-APIs should either (a) fully initialize into a good + // state (ie: a state in which a subsequent call to the corresponding + // _finalize() function would fully de-allocate whatever was allocated and + // thereby avoid a memory leak), or (b) not initialize at all. + + // NOTE: The bli_check_return_if_failure() macro will return r_val when + // the variable indicates a value indicating failure. Since r_val is + // declared as of type 'err_t' and the function returns a value of type + // 'int', an implicit typecast will occur if/when the macro detects failure. + + r_val = bli_gks_init(); bli_check_return_if_failure( r_val ); + r_val = bli_ind_init(); bli_check_return_if_failure( r_val ); + r_val = bli_thread_init(); bli_check_return_if_failure( r_val ); + r_val = bli_pack_init(); bli_check_return_if_failure( r_val ); + r_val = bli_pba_init(); bli_check_return_if_failure( r_val ); + r_val = bli_sba_init(); bli_check_return_if_failure( r_val ); + + return ( int )BLIS_SUCCESS; } int bli_finalize_apis( void ) { + err_t r_val = BLIS_SUCCESS; + // Finalize various sub-APIs. - bli_memsys_finalize(); + r_val = bli_sba_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_pba_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_pack_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_thread_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_ind_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_gks_finalize(); bli_check_return_if_failure( r_val ); + + return ( int )BLIS_SUCCESS; +} + +#if 0 +void bli_finalize_apis_fast( void ) +{ + // Finalize all APIs but skip the error checking. + bli_sba_finalize(); + bli_pba_finalize(); bli_pack_finalize(); bli_thread_finalize(); bli_ind_finalize(); bli_gks_finalize(); - - return 0; } +#endif diff --git a/frame/base/bli_init.h b/frame/base/bli_init.h index d1bea0cb34..230ae15846 100644 --- a/frame/base/bli_init.h +++ b/frame/base/bli_init.h @@ -32,15 +32,15 @@ */ -BLIS_EXPORT_BLIS void bli_init( void ); -BLIS_EXPORT_BLIS void bli_finalize( void ); +BLIS_EXPORT_BLIS err_t bli_init( void ); +BLIS_EXPORT_BLIS err_t bli_finalize( void ); -void bli_init_auto( void ); -void bli_finalize_auto( void ); +err_t bli_init_auto( void ); +err_t bli_finalize_auto( void ); -void bli_init_once( void ); -void bli_finalize_once( void ); +err_t bli_init_once( void ); +err_t bli_finalize_once( void ); -int bli_init_apis( void ); -int bli_finalize_apis( void ); +int bli_init_apis( void ); +int bli_finalize_apis( void ); diff --git a/frame/base/bli_memsys.c b/frame/base/bli_memsys.c deleted file mode 100644 index 7b62ded5c7..0000000000 --- a/frame/base/bli_memsys.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2016, Hewlett Packard Enterprise Development LP - Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -void bli_memsys_init( void ) -{ - // Query a native context so we have something to pass into - // bli_pba_init_pools(). We use BLIS_DOUBLE for the datatype, - // but the dt argument is actually only used when initializing - // contexts for induced methods. - // NOTE: Instead of calling bli_gks_query_cntx(), we call - // bli_gks_query_cntx_noinit() to avoid the call to bli_init_once(). - const cntx_t* cntx_p = bli_gks_query_cntx_noinit(); - - // Initialize the packing block allocator and its data structures. - bli_pba_init( cntx_p ); - - // Initialize the small block allocator and its data structures. - bli_sba_init(); -} - -void bli_memsys_finalize( void ) -{ - // Finalize the small block allocator and its data structures. - bli_sba_finalize(); - - // Finalize the packing block allocator and its data structures. - bli_pba_finalize(); -} - diff --git a/frame/base/bli_memsys.h b/frame/base/bli_memsys.h deleted file mode 100644 index be0d48e35b..0000000000 --- a/frame/base/bli_memsys.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2016, Hewlett Packard Enterprise Development LP - Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_MEMSYS_H -#define BLIS_MEMSYS_H - -// ----------------------------------------------------------------------------- - -void bli_memsys_init( void ); -void bli_memsys_finalize( void ); - - -#endif - diff --git a/frame/base/bli_pack.c b/frame/base/bli_pack.c index c5ce9cc6c9..919d44aaac 100644 --- a/frame/base/bli_pack.c +++ b/frame/base/bli_pack.c @@ -42,45 +42,84 @@ extern rntm_t global_rntm; // resides in bli_rntm.c.) extern bli_pthread_mutex_t global_rntm_mutex; +// A boolean that tracks whether bli_pack_init() has completed successfully. +static bool pack_is_init = FALSE; + // ----------------------------------------------------------------------------- -void bli_pack_init( void ) +bool bli_pack_is_init( void ) +{ + return pack_is_init; +} + +void bli_pack_mark_init( void ) { + pack_is_init = TRUE; +} + +void bli_pack_mark_uninit( void ) +{ + pack_is_init = FALSE; +} + +// ----------------------------------------------------------------------------- + +err_t bli_pack_init( void ) +{ + // Sanity check: Return early if the API is already initialized. + if ( bli_pack_is_init() ) return BLIS_SUCCESS; + // Read the environment variables and use them to initialize the // global runtime object. bli_pack_init_rntm_from_env( &global_rntm ); + + // Mark the API as initialized. + bli_pack_mark_init(); + + return BLIS_SUCCESS; } -void bli_pack_finalize( void ) +err_t bli_pack_finalize( void ) { + // Sanity check: Return early if the API is uninitialized. + if ( !bli_pack_is_init() ) return BLIS_SUCCESS; + + // Mark the API as uninitialized. + bli_pack_mark_uninit(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_pack_get_pack_a( bool* pack_a ) +err_t bli_pack_get_pack_a( bool* pack_a ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); *pack_a = bli_rntm_pack_a( &global_rntm ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_pack_get_pack_b( bool* pack_b ) +err_t bli_pack_get_pack_b( bool* pack_b ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); *pack_b = bli_rntm_pack_b( &global_rntm ); + + return BLIS_SUCCESS; } // ---------------------------------------------------------------------------- -void bli_pack_set_pack_a( bool pack_a ) +err_t bli_pack_set_pack_a( bool pack_a ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); @@ -89,14 +128,16 @@ void bli_pack_set_pack_a( bool pack_a ) // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); + + return BLIS_SUCCESS; } // ---------------------------------------------------------------------------- -void bli_pack_set_pack_b( bool pack_b ) +err_t bli_pack_set_pack_b( bool pack_b ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); @@ -105,6 +146,8 @@ void bli_pack_set_pack_b( bool pack_b ) // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); + + return BLIS_SUCCESS; } // ---------------------------------------------------------------------------- diff --git a/frame/base/bli_pack.h b/frame/base/bli_pack.h index c12740148c..8c0ade6377 100644 --- a/frame/base/bli_pack.h +++ b/frame/base/bli_pack.h @@ -35,15 +35,19 @@ #ifndef BLIS_PACK_H #define BLIS_PACK_H -void bli_pack_init( void ); -void bli_pack_finalize( void ); +bool bli_pack_is_init( void ); +void bli_pack_mark_init( void ); +void bli_pack_mark_uninit( void ); -BLIS_EXPORT_BLIS void bli_pack_get_pack_a( bool* pack_a ); -BLIS_EXPORT_BLIS void bli_pack_get_pack_b( bool* pack_b ); -BLIS_EXPORT_BLIS void bli_pack_set_pack_a( bool pack_a ); -BLIS_EXPORT_BLIS void bli_pack_set_pack_b( bool pack_b ); +err_t bli_pack_init( void ); +err_t bli_pack_finalize( void ); -void bli_pack_init_rntm_from_env( rntm_t* rntm ); +BLIS_EXPORT_BLIS err_t bli_pack_get_pack_a( bool* pack_a ); +BLIS_EXPORT_BLIS err_t bli_pack_get_pack_b( bool* pack_b ); +BLIS_EXPORT_BLIS err_t bli_pack_set_pack_a( bool pack_a ); +BLIS_EXPORT_BLIS err_t bli_pack_set_pack_b( bool pack_b ); + +void bli_pack_init_rntm_from_env( rntm_t* rntm ); #endif diff --git a/frame/base/bli_pba.c b/frame/base/bli_pba.c index 68dffd7285..1a06dcd9e2 100644 --- a/frame/base/bli_pba.c +++ b/frame/base/bli_pba.c @@ -39,6 +39,26 @@ // Statically initialize the mutex within the packing block allocator object. static pba_t pba = { .mutex = BLIS_PTHREAD_MUTEX_INITIALIZER }; +// A boolean that tracks whether bli_pba_init() has completed successfully. +static bool pba_is_init = FALSE; + +// ----------------------------------------------------------------------------- + +bool bli_pba_is_init( void ) +{ + return pba_is_init; +} + +void bli_pba_mark_init( void ) +{ + pba_is_init = TRUE; +} + +void bli_pba_mark_uninit( void ) +{ + pba_is_init = FALSE; +} + // ----------------------------------------------------------------------------- pba_t* bli_pba_query( void ) @@ -46,12 +66,15 @@ pba_t* bli_pba_query( void ) return &pba; } -void bli_pba_init +err_t bli_pba_init ( - const cntx_t* cntx + void ) { - pba_t* pba = bli_pba_query(); + // Sanity check: Return early if the API is already initialized. + if ( bli_pba_is_init() ) return BLIS_SUCCESS; + + pba_t* restrict pba = bli_pba_query(); const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE_GEN; malloc_ft malloc_fp = BLIS_MALLOC_POOL; @@ -67,20 +90,37 @@ void bli_pba_init // keeps bli_pba_init() simpler and removes the possibility of // something going wrong during mutex initialization. + // The mutex field of pba is initialized statically above. It's + // important to keep the mutex initialization outside of the _init() + // function so that in the rare event that BLIS initialization fails + // part way through, we don't have to worry about whether or not we + // need to destroy the mutex first (before allowing the application + // a second chance at initialization). + #ifdef BLIS_ENABLE_PBA_POOLS - bli_pba_init_pools( cntx, pba ); + err_t r_val = bli_pba_init_pools( pba ); + bli_check_return_if_failure( r_val ); #endif + + // Mark the API as initialized. + bli_pba_mark_init(); + + return BLIS_SUCCESS; } -void bli_pba_finalize +err_t bli_pba_finalize ( void ) { - pba_t* pba = bli_pba_query(); + // Sanity check: Return early if the API is uninitialized. + if ( !bli_pba_is_init() ) return BLIS_SUCCESS; + + pba_t* restrict pba = bli_pba_query(); #ifdef BLIS_ENABLE_PBA_POOLS - bli_pba_finalize_pools( pba ); + err_t r_val = bli_pba_finalize_pools( pba ); + bli_check_return_if_failure( r_val ); #endif // The mutex field of pba is initialized statically above, and @@ -88,9 +128,14 @@ void bli_pba_finalize bli_pba_set_malloc_fp( NULL, pba ); bli_pba_set_free_fp( NULL, pba ); + + // Mark the API as uninitialized. + bli_pba_mark_uninit(); + + return BLIS_SUCCESS; } -void bli_pba_acquire_m +err_t bli_pba_acquire_m ( rntm_t* rntm, siz_t req_size, @@ -98,9 +143,6 @@ void bli_pba_acquire_m mem_t* mem ) { - pool_t* pool; - pblk_t* pblk; - dim_t pi; err_t r_val; // If the internal memory pools for packing block allocator are disabled, @@ -127,6 +169,7 @@ void bli_pba_acquire_m // For general-use buffer requests, dynamically allocating memory // is assumed to be sufficient. void* buf = bli_fmalloc_align( malloc_fp, req_size, align_size, &r_val ); + bli_check_return_if_failure( r_val ); // Initialize the mem_t object with: // - the address of the memory block, @@ -148,11 +191,11 @@ void bli_pba_acquire_m // Map the requested packed buffer type to a zero-based index, which // we then use to select the corresponding memory pool. - pi = bli_packbuf_index( buf_type ); - pool = bli_pba_pool( pi, pba ); + dim_t pi = bli_packbuf_index( buf_type ); + pool_t* pool = bli_pba_pool( pi, pba ); // Extract the address of the pblk_t struct within the mem_t. - pblk = bli_mem_pblk( mem ); + pblk_t* pblk = bli_mem_pblk( mem ); // Acquire the mutex associated with the pba object. bli_pba_lock( pba ); @@ -168,7 +211,7 @@ void bli_pba_acquire_m // automatically, as-needed. Note that the addresses are stored // directly into the mem_t struct since pblk is the address of // the struct's pblk_t field. - bli_pool_checkout_block( req_size, pblk, pool ); + r_val = bli_pool_checkout_block( req_size, pblk, pool ); } // END CRITICAL SECTION @@ -176,6 +219,10 @@ void bli_pba_acquire_m // Release the mutex associated with the pba object. bli_pba_unlock( pba ); + // Now that we're out of the critical section, we can return if + // bli_pool_checkout_block() failed. + bli_check_return_if_failure( r_val ); + // Query the block_size from the pblk_t. This will be at least // req_size, perhaps larger. siz_t block_size = bli_pblk_block_size( pblk ); @@ -192,6 +239,8 @@ void bli_pba_acquire_m bli_mem_set_pool( pool, mem ); bli_mem_set_size( block_size, mem ); } + + return BLIS_SUCCESS; } @@ -256,6 +305,8 @@ void bli_pba_release // NOTE: We do not clear the buf_type field since there is no // "uninitialized" value for packbuf_t. bli_mem_clear( mem ); + + return; // BLIS_SUCCESS; } @@ -313,12 +364,20 @@ siz_t bli_pba_pool_size // ----------------------------------------------------------------------------- -void bli_pba_init_pools +err_t bli_pba_init_pools ( - const cntx_t* cntx, - pba_t* pba + pba_t* pba ) { + const cntx_t* cntx; + err_t r_val; + + // Query a native context so we have something to pass into + // bli_pba_compute_pool_block_sizes(). + // NOTE: Instead of calling bli_gks_query_cntx(), we call + // bli_gks_query_cntx_noinit() to avoid the call to bli_init_once(). + bli_gks_query_cntx_noinit( &cntx ); + // Map each of the packbuf_t values to an index starting at zero. const dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK ); const dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL ); @@ -365,19 +424,31 @@ void bli_pba_init_pools cntx ); // Initialize the memory pools for A, B, and C. - bli_pool_init( num_blocks_a, block_ptrs_len_a, block_size_a, align_size_a, - offset_size_a, malloc_fp, free_fp, pool_a ); - bli_pool_init( num_blocks_b, block_ptrs_len_b, block_size_b, align_size_b, - offset_size_b, malloc_fp, free_fp, pool_b ); - bli_pool_init( num_blocks_c, block_ptrs_len_c, block_size_c, align_size_c, - offset_size_c, malloc_fp, free_fp, pool_c ); + r_val = bli_pool_init( num_blocks_a, block_ptrs_len_a, block_size_a, align_size_a, + offset_size_a, malloc_fp, free_fp, pool_a ); + + bli_check_callthen_return_if_failure( bli_pba_finalize_pools( pba ), r_val ); + + r_val = bli_pool_init( num_blocks_b, block_ptrs_len_b, block_size_b, align_size_b, + offset_size_b, malloc_fp, free_fp, pool_b ); + + bli_check_callthen_return_if_failure( bli_pba_finalize_pools( pba ), r_val ); + + r_val = bli_pool_init( num_blocks_c, block_ptrs_len_c, block_size_c, align_size_c, + offset_size_c, malloc_fp, free_fp, pool_c ); + + bli_check_callthen_return_if_failure( bli_pba_finalize_pools( pba ), r_val ); + + return BLIS_SUCCESS; } -void bli_pba_finalize_pools +err_t bli_pba_finalize_pools ( pba_t* pba ) { + err_t r_val; + // Map each of the packbuf_t values to an index starting at zero. dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK ); dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL ); @@ -389,9 +460,11 @@ void bli_pba_finalize_pools pool_t* pool_c = bli_pba_pool( index_c, pba ); // Finalize the memory pools for A, B, and C. - bli_pool_finalize( pool_a ); - bli_pool_finalize( pool_b ); - bli_pool_finalize( pool_c ); + r_val = bli_pool_finalize( pool_a ); bli_check_return_if_failure( r_val ); + r_val = bli_pool_finalize( pool_b ); bli_check_return_if_failure( r_val ); + r_val = bli_pool_finalize( pool_c ); bli_check_return_if_failure( r_val ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_pba.h b/frame/base/bli_pba.h index dfda530902..377ced4da1 100644 --- a/frame/base/bli_pba.h +++ b/frame/base/bli_pba.h @@ -119,18 +119,16 @@ BLIS_INLINE void bli_pba_unlock( pba_t* pba ) // ----------------------------------------------------------------------------- -BLIS_EXPORT_BLIS pba_t* bli_pba_query( void ); +bool bli_pba_is_init( void ); +void bli_pba_mark_init( void ); +void bli_pba_mark_uninit( void ); -void bli_pba_init - ( - const cntx_t* cntx - ); -void bli_pba_finalize - ( - void - ); +pba_t* bli_pba_query( void ); + +err_t bli_pba_init( void ); +err_t bli_pba_finalize( void ); -void bli_pba_acquire_m +err_t bli_pba_acquire_m ( rntm_t* rntm, siz_t req_size, @@ -162,12 +160,11 @@ siz_t bli_pba_pool_size // ---------------------------------------------------------------------------- -void bli_pba_init_pools +err_t bli_pba_init_pools ( - const cntx_t* cntx, - pba_t* pba + pba_t* pba ); -void bli_pba_finalize_pools +err_t bli_pba_finalize_pools ( pba_t* pba ); diff --git a/frame/base/bli_pool.c b/frame/base/bli_pool.c index 684b0ef736..b04d1e307b 100644 --- a/frame/base/bli_pool.c +++ b/frame/base/bli_pool.c @@ -37,7 +37,7 @@ //#define BLIS_ENABLE_MEM_TRACING -void bli_pool_init +err_t bli_pool_init ( siz_t num_blocks, siz_t block_ptrs_len, @@ -51,6 +51,11 @@ void bli_pool_init { err_t r_val; + // Start off with a zeroed-out pool_t structure. + // NOTE: This is especially important because it zeroes out the .block_ptrs + // field, which bli_pool_finalize() uses to decide whether to return early. + bli_pool_clear( pool ); + // Make sure that block_ptrs_len is at least num_blocks. block_ptrs_len = bli_max( block_ptrs_len, num_blocks ); @@ -64,31 +69,12 @@ void bli_pool_init ( int )block_ptrs_len ); #endif - // Allocate the block_ptrs array. - // FGVZ: Do we want to call malloc_fp() for internal data structures as - // well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g. + // Allocate the block_ptrs array. We use calloc() so that all elements are + // initialized to zero, or NULL. This allows us to deallocate only those + // blocks that were allocated in the event of a failure. pblk_t* block_ptrs = - bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ), &r_val ); - - // Allocate and initialize each entry in the block_ptrs array. - for ( dim_t i = 0; i < num_blocks; ++i ) - { - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_pool_init(): allocating block %d of size %d (align %d, offset %d).\n", - ( int )i, ( int )block_size, ( int )align_size, ( int )offset_size ); - fflush( stdout ); - #endif - - bli_pool_alloc_block - ( - block_size, - align_size, - offset_size, - malloc_fp, - &(block_ptrs[i]) - ); - } + bli_calloc_intl( block_ptrs_len * sizeof( pblk_t ), &r_val ); // NOTE: The semantics of top_index approximate a stack, where a "full" // stack (no blocks checked out) is one where top_index == 0 and an empty @@ -101,7 +87,14 @@ void bli_pool_init // number line in which blocks are checked out from lowest to highest, // and additional blocks are added at the higher end. + // If the allocation failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + // Initialize the pool_t structure. + // NOTE: Given that the calloc() succeeded, we must set these fields so + // that if any of the below calls to bli_pool_alloc_block() fail, there + // will be enough information in the structure to allow bli_pool_finalize() + // to de-allocate what was allocated. bli_pool_set_block_ptrs( block_ptrs, pool ); bli_pool_set_block_ptrs_len( block_ptrs_len, pool ); bli_pool_set_top_index( 0, pool ); @@ -111,9 +104,33 @@ void bli_pool_init bli_pool_set_offset_size( offset_size, pool ); bli_pool_set_malloc_fp( malloc_fp, pool ); bli_pool_set_free_fp( free_fp, pool ); + + // Allocate and initialize each entry in the block_ptrs array. + for ( dim_t i = 0; i < num_blocks; ++i ) + { + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_pool_init(): allocating block %d of size %d (align %d, offset %d).\n", + ( int )i, ( int )block_size, ( int )align_size, ( int )offset_size ); + fflush( stdout ); + #endif + + r_val = bli_pool_alloc_block + ( + block_size, + align_size, + offset_size, + malloc_fp, + &(block_ptrs[i]) + ); + + // If the allocation failed, finalize the pool and return the error. + bli_check_callthen_return_if_failure( bli_pool_finalize( pool ), r_val ); + } + + return BLIS_SUCCESS; } -void bli_pool_finalize +err_t bli_pool_finalize ( pool_t* pool ) @@ -126,6 +143,10 @@ void bli_pool_finalize // Query the block_ptrs array. pblk_t* block_ptrs = bli_pool_block_ptrs( pool ); + // Return early if the block_ptrs array is NULL. This would typically + // indicate that the pool structure was was cleared but never initialized. + if ( block_ptrs == NULL ) return BLIS_SUCCESS; + // Query the total number of blocks currently allocated. const siz_t num_blocks = bli_pool_num_blocks( pool ); @@ -135,16 +156,17 @@ void bli_pool_finalize // checked out, then we would expect top_index != 0, and therefore this // check is not universally appropriate. #if 0 + err_t r_val; + // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); - // Sanity check: The top_index should be zero. - if ( top_index != 0 ) + // Sanity check: The top_index should be zero. If it's not, then at + // least one block is still checked out to a thread. + if ( bli_error_checking_is_enabled() ) { - printf( "bli_pool_finalize(): final top_index == %d (expected 0); block_size: %d.\n", - ( int )top_index, ( int )bli_pool_block_size( pool ) ); - printf( "bli_pool_finalize(): Implication: not all blocks were checked back in!\n" ); - bli_abort(); + r_val = bli_check_outstanding_mem_pool_blocks( top_index ); + bli_check_return_if_failure( r_val ); } #endif @@ -180,21 +202,18 @@ void bli_pool_finalize // Free the block_ptrs array. bli_free_intl( block_ptrs ); - // This explicit clearing of the pool_t struct is not strictly - // necessary and so it has been commented out. -#if 0 - // Clear the contents of the pool_t struct. - bli_pool_set_block_ptrs( NULL, pool ); - bli_pool_set_block_ptrs_len( 0, pool ); - bli_pool_set_num_blocks( 0, pool ); - bli_pool_set_top_index( 0, pool ); - bli_pool_set_block_size( 0, pool ); - bli_pool_set_align_size( 0, pool ); - bli_pool_set_offset_size( 0, pool ); -#endif + // Clear the pool structure. This step is important because we want to + // either leave the pool structure in a fully initialized state (with a + // non-NULL block_ptrs field) or we want it to be cleared (with a NULL + // block_ptrs field) so that we'll know it is uninitialized. This is + // needed so that the caller can tell if a pool needs to be finalized in + // the event of an error. + bli_pool_clear( pool ); + + return BLIS_SUCCESS; } -void bli_pool_reinit +err_t bli_pool_reinit ( siz_t num_blocks_new, siz_t block_ptrs_len_new, @@ -204,6 +223,8 @@ void bli_pool_reinit pool_t* pool ) { + err_t r_val; + // Preserve the pointers to malloc() and free() provided when the pool // was first initialized. malloc_ft malloc_fp = bli_pool_malloc_fp( pool ); @@ -215,11 +236,14 @@ void bli_pool_reinit // those blocks back into the pool. (This condition can be detected // since the block size is encoded into each pblk, which is copied // upon checkout.) - bli_pool_finalize( pool ); + r_val = bli_pool_finalize( pool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Reinitialize the pool with the new parameters, in particular, // the new block size. - bli_pool_init + r_val = bli_pool_init ( num_blocks_new, block_ptrs_len_new, @@ -230,15 +254,22 @@ void bli_pool_reinit free_fp, pool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + + return BLIS_SUCCESS; } -void bli_pool_checkout_block +err_t bli_pool_checkout_block ( siz_t req_size, pblk_t* block, pool_t* pool ) { + err_t r_val; + // If the requested block size is smaller than what the pool was // initialized with, reinitialize the pool to contain blocks of the // requested size. @@ -256,7 +287,7 @@ void bli_pool_checkout_block fflush( stdout ); #endif - bli_pool_reinit + r_val = bli_pool_reinit ( num_blocks_new, block_ptrs_len_new, @@ -265,6 +296,9 @@ void bli_pool_checkout_block offset_size_new, pool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); } // If the pool is exhausted, add a block. @@ -276,7 +310,10 @@ void bli_pool_checkout_block fflush( stdout ); #endif - bli_pool_grow( 1, pool ); + r_val = bli_pool_grow( 1, pool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); } // At this point, at least one block is guaranteed to be available. @@ -305,9 +342,11 @@ void bli_pool_checkout_block // Increment the pool's top_index. bli_pool_set_top_index( top_index + 1, pool ); + + return BLIS_SUCCESS; } -void bli_pool_checkin_block +err_t bli_pool_checkin_block ( pblk_t* block, pool_t* pool @@ -326,7 +365,8 @@ void bli_pool_checkin_block free_ft free_fp = bli_pool_free_fp( pool ); bli_pool_free_block( offset_size, free_fp, block ); - return; + + return BLIS_SUCCESS; } // Query the block_ptrs array. @@ -349,9 +389,11 @@ void bli_pool_checkin_block // Decrement the pool's top_index. bli_pool_set_top_index( top_index - 1, pool ); + + return BLIS_SUCCESS; } -void bli_pool_grow +err_t bli_pool_grow ( siz_t num_blocks_add, pool_t* pool @@ -360,7 +402,7 @@ void bli_pool_grow err_t r_val; // If the requested increase is zero, return early. - if ( num_blocks_add == 0 ) return; + if ( num_blocks_add == 0 ) return BLIS_SUCCESS; // Query the allocated length of the block_ptrs array and also the // total number of blocks currently allocated. @@ -396,12 +438,15 @@ void bli_pool_grow // Query the current block_ptrs array. pblk_t* block_ptrs_cur = bli_pool_block_ptrs( pool ); - // Allocate a new block_ptrs array. - // FGVZ: Do we want to call malloc_fp() for internal data structures as - // well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g. + // Allocate the block_ptrs array. We use calloc() so that all elements are + // initialized to zero, or NULL. This allows us to deallocate only those + // blocks that were allocated in the event of a failure. pblk_t* block_ptrs_new = - bli_malloc_intl( block_ptrs_len_new * sizeof( pblk_t ), &r_val ); + bli_calloc_intl( block_ptrs_len_new * sizeof( pblk_t ), &r_val ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); @@ -449,10 +494,12 @@ void bli_pool_grow fflush( stdout ); #endif + dim_t i; + // Allocate the requested additional blocks in the resized array. - for ( dim_t i = num_blocks_cur; i < num_blocks_new; ++i ) + for ( i = num_blocks_cur; i < num_blocks_new; ++i ) { - bli_pool_alloc_block + r_val = bli_pool_alloc_block ( block_size, align_size, @@ -460,22 +507,28 @@ void bli_pool_grow malloc_fp, &(block_ptrs[i]) ); + + // If the previous function failed, update the number of blocks in the + // pool to reflect the number that were added and then return the error. + bli_check_callthen_return_if_failure( bli_pool_set_num_blocks( i, pool ), r_val ); } // Update the pool_t struct with the new number of allocated blocks. // Notice that top_index remains unchanged, as do the block_size and // align_size fields. bli_pool_set_num_blocks( num_blocks_new, pool ); + + return BLIS_SUCCESS; } -void bli_pool_shrink +err_t bli_pool_shrink ( siz_t num_blocks_sub, pool_t* pool ) { // If the requested decrease is zero, return early. - if ( num_blocks_sub == 0 ) return; + if ( num_blocks_sub == 0 ) return BLIS_SUCCESS; // Query the total number of blocks currently allocated. const siz_t num_blocks = bli_pool_num_blocks( pool ); @@ -516,9 +569,11 @@ void bli_pool_shrink // Note that after shrinking the pool, num_blocks < block_ptrs_len. // This means the pool can grow again by num_blocks_sub before // a re-allocation of block_ptrs is triggered. + + return BLIS_SUCCESS; } -void bli_pool_alloc_block +err_t bli_pool_alloc_block ( siz_t block_size, siz_t align_size, @@ -540,10 +595,19 @@ void bli_pool_alloc_block // be recovered when it's time to free the block. Note that we have to // add offset_size to the number of bytes requested since we will skip // that many bytes at the beginning of the allocated memory. + // NOTE: What is the purpose of the offset_size parameter? It was first + // found to be useful by Nicholai Tukanov when optimizing microkernel + // performance on the POWER9 microarchitecture. The subconfiguration + // ('power9') for use on that hardware uses unconventional offset values + // for the pool of packing blocks for A and B. (See bli_pba.c for how + // those pools are created.) void* buf = bli_fmalloc_align( malloc_fp, block_size + offset_size, align_size, &r_val ); + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + #if 0 // NOTE: This code is disabled because it is not needed, since // bli_fmalloc_align() is guaranteed to return an aligned address. @@ -573,33 +637,40 @@ void bli_pool_alloc_block // Save the results in the pblk_t structure. bli_pblk_set_buf( buf, block ); bli_pblk_set_block_size( block_size, block ); + + return BLIS_SUCCESS; } -void bli_pool_free_block +err_t bli_pool_free_block ( siz_t offset_size, free_ft free_fp, pblk_t* block ) { - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_pool_free_block(): calling ffree_align(): size %d.\n", - ( int )bli_pblk_block_size( block ) ); - fflush( stdout ); - #endif - // Extract the pblk_t buffer, which is the aligned address returned from // bli_fmalloc_align() when the block was allocated. void* buf = bli_pblk_buf( block ); + // Return early if the pointer inside of the pblk_t is NULL. + if ( buf == NULL ) return BLIS_SUCCESS; + // Undo the pointer advancement by offset_size bytes performed previously // by bli_pool_alloc_block(). buf = ( void* )( ( char* )buf - offset_size ); + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_pool_free_block(): calling ffree_align(): size %d.\n", + ( int )bli_pblk_block_size( block ) ); + fflush( stdout ); + #endif + // Free the block via the bli_ffree_align() wrapper, which recovers the // original pointer that was returned by the pool's malloc() function when // the block was allocated. bli_ffree_align( free_fp, buf ); + + return BLIS_SUCCESS; } void bli_pool_print @@ -642,3 +713,20 @@ void bli_pblk_print printf( " block address (aligned): %p\n", buf ); } +void bli_pool_clear + ( + pool_t* pool + ) +{ + // Clear the contents of the pool_t struct. + bli_pool_set_block_ptrs( NULL, pool ); + bli_pool_set_block_ptrs_len( 0, pool ); + bli_pool_set_top_index( 0, pool ); + bli_pool_set_num_blocks( 0, pool ); + bli_pool_set_block_size( 0, pool ); + bli_pool_set_align_size( 0, pool ); + bli_pool_set_offset_size( 0, pool ); + bli_pool_set_malloc_fp( NULL, pool ); + bli_pool_set_free_fp( NULL, pool ); +} + diff --git a/frame/base/bli_pool.h b/frame/base/bli_pool.h index 0b16ae8eea..121872df83 100644 --- a/frame/base/bli_pool.h +++ b/frame/base/bli_pool.h @@ -215,7 +215,7 @@ BLIS_INLINE void bli_pool_set_top_index( siz_t top_index, pool_t* pool ) \ // ----------------------------------------------------------------------------- -void bli_pool_init +err_t bli_pool_init ( siz_t num_blocks, siz_t block_ptrs_len, @@ -226,11 +226,11 @@ void bli_pool_init free_ft free_fp, pool_t* pool ); -void bli_pool_finalize +err_t bli_pool_finalize ( pool_t* pool ); -void bli_pool_reinit +err_t bli_pool_reinit ( siz_t num_blocks_new, siz_t block_ptrs_len_new, @@ -240,30 +240,30 @@ void bli_pool_reinit pool_t* pool ); -void bli_pool_checkout_block +err_t bli_pool_checkout_block ( siz_t req_size, pblk_t* block, pool_t* pool ); -void bli_pool_checkin_block +err_t bli_pool_checkin_block ( pblk_t* block, pool_t* pool ); -void bli_pool_grow +err_t bli_pool_grow ( siz_t num_blocks_add, pool_t* pool ); -void bli_pool_shrink +err_t bli_pool_shrink ( siz_t num_blocks_sub, pool_t* pool ); -void bli_pool_alloc_block +err_t bli_pool_alloc_block ( siz_t block_size, siz_t align_size, @@ -271,7 +271,7 @@ void bli_pool_alloc_block malloc_ft malloc_fp, pblk_t* block ); -void bli_pool_free_block +err_t bli_pool_free_block ( siz_t offset_size, free_ft free_fp, @@ -287,5 +287,9 @@ void bli_pblk_print const pblk_t* pblk ); +void bli_pool_clear + ( + pool_t* pool + ); #endif diff --git a/frame/base/bli_rntm.c b/frame/base/bli_rntm.c index 2c13c74a22..55af6ab386 100644 --- a/frame/base/bli_rntm.c +++ b/frame/base/bli_rntm.c @@ -57,7 +57,7 @@ void bli_rntm_init_from_global( rntm_t* rntm ) bli_pthread_mutex_unlock( &global_rntm_mutex ); } -// ----------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- void bli_rntm_set_ways_for_op ( diff --git a/frame/base/bli_rntm.h b/frame/base/bli_rntm.h index 2a39f8894c..65b80ea786 100644 --- a/frame/base/bli_rntm.h +++ b/frame/base/bli_rntm.h @@ -330,7 +330,9 @@ BLIS_INLINE void bli_rntm_init( rntm_t* rntm ) bli_rntm_clear_pba( rntm ); } +// // -- rntm_t total thread calculation ------------------------------------------ +// BLIS_INLINE dim_t bli_rntm_calc_num_threads ( @@ -348,9 +350,9 @@ BLIS_INLINE dim_t bli_rntm_calc_num_threads return n_threads; } -// ----------------------------------------------------------------------------- - -// Function prototypes +// +// -- Function prototypes ------------------------------------------------------ +// BLIS_EXPORT_BLIS void bli_rntm_init_from_global( rntm_t* rntm ); @@ -391,5 +393,19 @@ dim_t bli_rntm_calc_num_threads_in const rntm_t* rntm ); +// +// -- rntm_t convenience init wrapper ------------------------------------------ +// + +BLIS_INLINE void bli_rntm_init_if_null( rntm_t** rntm, rntm_t* rntm_l ) +{ + // Initialize a local runtime. If the caller has a NULL rntm_t pointer, + // initialize from the global rntm_t. If the caller has a non-NULL rntm_t + // pointer, initialize from that rntm_t struct. In either case, the now- + // initialized local rntm_t struct is aliased via rntm. + if ( *rntm == NULL ) { bli_rntm_init_from_global( rntm_l ); *rntm = rntm_l; } + else { *rntm_l = **rntm; *rntm = rntm_l; } +} + #endif diff --git a/frame/base/bli_sba.c b/frame/base/bli_sba.c index 776622bb4a..ddf12df7bc 100644 --- a/frame/base/bli_sba.c +++ b/frame/base/bli_sba.c @@ -38,6 +38,28 @@ // Note that the sba is an apool_t of array_t of pool_t. static apool_t sba = { .mutex = BLIS_PTHREAD_MUTEX_INITIALIZER }; +// A boolean that tracks whether bli_sba_init() has completed successfully. +static bool sba_is_init = FALSE; + +// ----------------------------------------------------------------------------- + +bool bli_sba_is_init( void ) +{ + return sba_is_init; +} + +void bli_sba_mark_init( void ) +{ + sba_is_init = TRUE; +} + +void bli_sba_mark_uninit( void ) +{ + sba_is_init = FALSE; +} + +// ----------------------------------------------------------------------------- + apool_t* bli_sba_query( void ) { return &sba; @@ -45,29 +67,56 @@ apool_t* bli_sba_query( void ) // ----------------------------------------------------------------------------- -void bli_sba_init( void ) +err_t bli_sba_init( void ) { - bli_apool_init( &sba ); + err_t r_val; + + // Sanity check: Return early if the API is already initialized. + if ( bli_sba_is_init() ) return BLIS_SUCCESS; + + // Initialize the small block allocator. + r_val = bli_apool_init( &sba ); + bli_check_return_if_failure( r_val ); + + // Mark the API as initialized. + bli_sba_mark_init(); + + return BLIS_SUCCESS; } -void bli_sba_finalize( void ) +err_t bli_sba_finalize( void ) { - bli_apool_finalize( &sba ); + err_t r_val; + + // Sanity check: Return early if the API is uninitialized. + if ( !bli_sba_is_init() ) return BLIS_SUCCESS; + + // Finalize the small block allocator. + r_val = bli_apool_finalize( &sba ); + bli_check_return_if_failure( r_val ); + + // Mark the API as uninitialized. + bli_sba_mark_uninit(); + + return BLIS_SUCCESS; } -void* bli_sba_acquire +// ----------------------------------------------------------------------------- + +err_t bli_sba_acquire ( rntm_t* rntm, - siz_t req_size + siz_t req_size, + void** block ) { - void* block; err_t r_val; #ifdef BLIS_ENABLE_SBA_POOLS if ( rntm == NULL ) { - block = bli_malloc_intl( req_size, &r_val ); + *block = bli_malloc_intl( req_size, &r_val ); + bli_check_return_if_failure( r_val ); } else { @@ -86,7 +135,8 @@ void* bli_sba_acquire // would be timed.) if ( pool == NULL ) { - block = bli_malloc_intl( req_size, &r_val ); + *block = bli_malloc_intl( req_size, &r_val ); + bli_check_return_if_failure( r_val ); } else { @@ -104,20 +154,22 @@ void* bli_sba_acquire } // Check out a block using the block_size queried above. - bli_pool_checkout_block( block_size, &pblk, pool ); + r_val = bli_pool_checkout_block( block_size, &pblk, pool ); + bli_check_return_if_failure( r_val ); // The block address is stored within the pblk_t. - block = bli_pblk_buf( &pblk ); + *block = bli_pblk_buf( &pblk ); } } #else - block = bli_malloc_intl( req_size, &r_val ); + *block = bli_malloc_intl( req_size, &r_val ); + bli_check_return_if_failure( r_val ); #endif // Return the address obtained from the pblk_t. - return block; + return BLIS_SUCCESS; } void bli_sba_release @@ -133,8 +185,6 @@ void bli_sba_release } else { - pblk_t pblk; - // Query the small block pool from the rntm. pool_t* pool = bli_rntm_sba_pool( rntm ); @@ -144,6 +194,8 @@ void bli_sba_release } else { + pblk_t pblk; + // Query the block_size field from the pool. This is not super-important // for this particular application of the pool_t (that is, the "leaf" // component of the sba), but it seems like good housekeeping to maintain @@ -168,16 +220,24 @@ void bli_sba_release #endif } -array_t* bli_sba_checkout_array +// ----------------------------------------------------------------------------- + +err_t bli_sba_checkout_array ( - const siz_t n_threads + siz_t n_threads, + const array_t** array ) { + err_t r_val; + #ifndef BLIS_ENABLE_SBA_POOLS - return NULL; + *array = NULL; return BLIS_SUCCESS; #endif - return bli_apool_checkout_array( n_threads, &sba ); + r_val = bli_apool_checkout_array( n_threads, array, &sba ); + bli_check_return_if_failure( r_val ); + + return BLIS_SUCCESS; } void bli_sba_checkin_array @@ -192,7 +252,9 @@ void bli_sba_checkin_array bli_apool_checkin_array( array, &sba ); } -void bli_sba_rntm_set_pool +// ----------------------------------------------------------------------------- + +err_t bli_sba_rntm_set_pool ( siz_t index, array_t* array, @@ -204,11 +266,16 @@ void bli_sba_rntm_set_pool return; #endif + pool_t* pool; + // Query the pool_t* in the array_t corresponding to index. - pool_t* pool = bli_apool_array_elem( index, array ); + err_t r_val = bli_apool_array_elem( index, array, &pool ); + bli_check_return_if_failure( r_val ); // Embed the pool_t* into the rntm_t. bli_rntm_set_sba_pool( pool, rntm ); + + return BLIS_SUCCESS; } diff --git a/frame/base/bli_sba.h b/frame/base/bli_sba.h index 4fc3aaaeea..7b121e19b6 100644 --- a/frame/base/bli_sba.h +++ b/frame/base/bli_sba.h @@ -35,16 +35,41 @@ #ifndef BLIS_SBA_H #define BLIS_SBA_H +// ----------------------------------------------------------------------------- + +bool bli_sba_is_init( void ); +void bli_sba_mark_init( void ); +void bli_sba_mark_uninit( void ); + +// ----------------------------------------------------------------------------- + apool_t* bli_sba_query( void ); // ----------------------------------------------------------------------------- -void bli_sba_init( void ); -void bli_sba_finalize( void ); +err_t bli_sba_init( void ); +err_t bli_sba_finalize( void ); + +// ----------------------------------------------------------------------------- + +err_t bli_sba_acquire + ( + rntm_t* rntm, + siz_t req_size, + void** block + ); +void bli_sba_release + ( + rntm_t* rntm, + void* block + ); + +// ----------------------------------------------------------------------------- -array_t* bli_sba_checkout_array +err_t bli_sba_checkout_array ( - siz_t n_threads + siz_t n_threads, + const array_t** array ); void bli_sba_checkin_array @@ -52,24 +77,14 @@ void bli_sba_checkin_array array_t* array ); -void bli_sba_rntm_set_pool +// ----------------------------------------------------------------------------- + +err_t bli_sba_rntm_set_pool ( siz_t index, array_t* array, rntm_t* rntm ); -void* bli_sba_acquire - ( - rntm_t* rntm, - siz_t req_size - ); -void bli_sba_release - ( - rntm_t* rntm, - void* block - ); - - #endif diff --git a/frame/base/cast/bli_castnzm.c b/frame/base/cast/old/bli_castnzm.c similarity index 100% rename from frame/base/cast/bli_castnzm.c rename to frame/base/cast/old/bli_castnzm.c diff --git a/frame/base/cast/bli_castnzm.h b/frame/base/cast/old/bli_castnzm.h similarity index 100% rename from frame/base/cast/bli_castnzm.h rename to frame/base/cast/old/bli_castnzm.h diff --git a/frame/compat/amd/bla_gemv_amd.c b/frame/compat/amd/bla_gemv_amd.c index 398d1bf2c2..ef5ae12c72 100644 --- a/frame/compat/amd/bla_gemv_amd.c +++ b/frame/compat/amd/bla_gemv_amd.c @@ -144,7 +144,8 @@ void PASTEF77(ch,blasname) \ /* Obtain a valid context from the gks. This is needed because these implementations of ?gemv_() skip calling gemv_ex() and instead call the unblocked fused variants directly. */ \ - cntx_t* cntx = bli_gks_query_cntx(); \ + const cntx_t* cntx; \ + bli_gks_query_cntx( &cntx ); \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ diff --git a/frame/compat/extra/bla_gemm3m.c b/frame/compat/extra/bla_gemm3m.c index 258ac5bbbe..e1099a2648 100644 --- a/frame/compat/extra/bla_gemm3m.c +++ b/frame/compat/extra/bla_gemm3m.c @@ -103,7 +103,8 @@ void PASTEF77(ch,blasname) \ abbreviated version of bli_gemm_ex() so that we can bypass consideration of sup, which doesn't make sense in this context. */ \ { \ - cntx_t* cntx = ( cntx_t* )bli_gks_query_ind_cntx( BLIS_1M ); \ + const cntx_t* cntx; \ + bli_gks_query_ind_cntx( BLIS_1M, &cntx ); \ \ rntm_t rntm_l; \ rntm_t* rntm = &rntm_l; \ @@ -222,7 +223,8 @@ void PASTEF77(ch,blasname) \ abbreviated version of bli_gemm_ex() so that we can bypass consideration of sup, which doesn't make sense in this context. */ \ { \ - cntx_t* cntx = ( cntx_t* )bli_gks_query_ind_cntx( BLIS_1M ); \ + const cntx_t* cntx; \ + bli_gks_query_ind_cntx( BLIS_1M, &cntx ); \ \ rntm_t rntm_l; \ rntm_t* rntm = &rntm_l; \ diff --git a/frame/include/bli_error_macro_defs.h b/frame/include/bli_error_macro_defs.h index 00d8acdcb8..5d32df9f4e 100644 --- a/frame/include/bli_error_macro_defs.h +++ b/frame/include/bli_error_macro_defs.h @@ -40,5 +40,89 @@ bli_check_error_code_helper( code, __FILE__, __LINE__ ) +// TODO: Consider renaming this macro to one of: +// - bli_error_handle() +// - bli_error_handle_code() +// Also, consider replacing instances of +// if ( bli_is_failure( r_val ) ) return r_val; +// to a macro named something like: +// - bli_check_return_failure( r_val ); +// Also, consider adding some of logic from bli_check_error_code_helper() to +// 'else' branch of bli_check_return_error_code() so that we can intercept +// and handle undefined error codes? + +#define bli_check_return_error_code( code ) \ +{ \ + if ( bli_is_failure( code ) ) \ + { \ + if ( bli_error_mode_is_return() ) \ + { \ + return code; \ + } \ + else /* if ( bli_error_mode_is_abort() ) */ \ + { \ + bli_print_msg( bli_error_string_for_code( code ), \ + __FILE__, __LINE__ ); \ + bli_abort(); \ + } \ + } \ +} + +#define bli_check_threads_return_if_failure( e_val_p, thread ) \ +{ \ + /* Broadcast the address of the master thread's copy of e_val. */ \ + err_t* e_val_t0_p = bli_thread_broadcast( thread, e_val_p ); \ +\ + /* If the local error checking resulted in failure, save it to the master + thread's e_val. Note this includes master overwriting its own e_val. */ \ + if ( bli_is_failure( *(e_val_p) ) ) *e_val_t0_p = *(e_val_p); \ +\ + /* Wait for all theads to execute the previous code. */ \ + bli_thread_barrier( thread ); \ +\ + /* If any thread reported failure, everyone returns. All threads + return their local error code. */ \ + if ( bli_is_failure( *e_val_t0_p ) ) return *e_val_p; \ +} + +#define bli_check_thread0_return_if_failure( e_val_p, thread ) \ +{ \ + /* Broadcast the address of the master thread's copy of e_val. */ \ + err_t* e_val_t0_p = bli_thread_broadcast( thread, e_val_p ); \ +\ + /* If the master thread reported failure, everyone returns. All threads + return their local error code. */ \ + if ( bli_is_failure( *e_val_t0_p ) ) return *e_val_p; \ +} + +#define bli_check_return_if_failure( error_code ) \ +{ \ + if ( bli_is_failure( error_code ) ) return error_code; \ +} + +#define bli_check_callthen_return_if_failure( func, error_code ) \ +{ \ + /* Note that the 'func' token will be a function call, including its + parenthesized parameter list (even if it is empty). */ \ + if ( bli_is_failure( error_code ) ) { func; return error_code; } \ +} + +#define bli_check_return_other_if_failure( error_code, other_val ) \ +{ \ + if ( bli_is_failure( error_code ) ) return other_val; \ +} + +#define BLIS_INIT_ONCE() \ +{ \ + err_t r_val = bli_init_once(); \ + bli_check_return_if_failure( r_val ); \ +} + +#define BLIS_FINALIZE_ONCE() \ +{ \ + err_t r_val = bli_finalize_once(); \ + bli_check_return_if_failure( r_val ); \ +} + #endif diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 1822065dab..1c8dcc95c5 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -427,6 +427,21 @@ BLIS_INLINE bool bli_is_unit_diag( diag_t diag ) } +// ind_t + +BLIS_INLINE bool bli_is_1m( ind_t im ) +{ + return ( bool ) + ( im == BLIS_1M ); +} + +BLIS_INLINE bool bli_is_nat( ind_t im ) +{ + return ( bool ) + ( im == BLIS_NAT ); +} + + // err_t-related BLIS_INLINE bool bli_is_success( err_t err ) @@ -513,6 +528,21 @@ BLIS_INLINE void bli_set_dims_incs_with_trans( trans_t trans, } +// direction-related + +BLIS_INLINE bool bli_is_fwd( dir_t direct ) +{ + return ( bool ) + ( direct == BLIS_FWD ); +} + +BLIS_INLINE bool bli_is_bwd( dir_t direct ) +{ + return ( bool ) + ( direct == BLIS_BWD ); +} + + // blocksize-related BLIS_INLINE dim_t bli_determine_blocksize_dim_f( dim_t i, dim_t dim, dim_t b_alg ) diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 08c7ddc4a6..4025bb5a5c 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -471,6 +471,10 @@ typedef enum // -- Data type -- +// NOTE: There are bits of code in BLIS that implicitly assume that we can +// index from BLIS_DT_LO (BLIS_FLOAT) to BLIS_DT_HI (BLIS_DCOMPLEX). Thus, +// those types need to be kept together / adjacent / contiguous. + typedef enum { BLIS_FLOAT = BLIS_BITVAL_FLOAT_TYPE, @@ -831,8 +835,9 @@ typedef enum // index 0, implement something like a BLIS_OPID_LEVEL3_RANGE_START // value that can be subtracted from the opid_t value to map it // to a zero-based range. -// This is needed because these level-3 opid_t values are used in -// bli_l3_ind.c to index into arrays. +// This is needed because some code in BLIS indexes with opid_t values, +// such as through an array, hence why starting at 0 is important +// (example: bli_l3_ind.c). // BLIS_GEMM = 0, BLIS_GEMMT, @@ -956,6 +961,15 @@ typedef enum // value (BLIS_ARCH_GENERIC) is given index num_archs-1. BLIS_NUM_ARCHS +# if 0 + // The maximum number of chars (including null terminator '\0') that we + // would ever need to store the name of a configuration as a string. This + // is used very infrequently, but there are times when we want to allocate + // enough bytes for all arch_t strings (as defined in bli_arch.c) without + // searching for the longest string at runtime. + BLIS_ARCH_MAX_STR_LEN = 20 +#endif + } arch_t; @@ -1460,19 +1474,27 @@ typedef enum BLIS_FULL_ERROR_CHECKING } errlev_t; +typedef enum +{ + BLIS_ERROR_RETURN = 0, + BLIS_ERROR_ABORT +} errmode_t; + typedef enum { // Generic error codes - BLIS_SUCCESS = ( -1), - BLIS_FAILURE = ( -2), + BLIS_SUCCESS = ( 0), + BLIS_FAILURE = ( -1), BLIS_ERROR_CODE_MIN = ( -9), // General errors BLIS_INVALID_ERROR_CHECKING_LEVEL = ( -10), - BLIS_UNDEFINED_ERROR_CODE = ( -11), - BLIS_NULL_POINTER = ( -12), - BLIS_NOT_YET_IMPLEMENTED = ( -13), + BLIS_INVALID_ERROR_HANDLING_MODE = ( -11), + BLIS_UNDEFINED_ERROR_CODE = ( -12), + BLIS_NULL_POINTER = ( -13), + BLIS_NOT_YET_IMPLEMENTED = ( -14), + BLIS_REJECT_EXEC = ( -15), // Parameter-specific errors BLIS_INVALID_SIDE = ( -20), @@ -1521,47 +1543,61 @@ typedef enum // Storage-specific errors BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT = ( -70), + // Induced method-specific errors + BLIS_INVALID_IND = ( -80), + // Partitioning-specific errors - BLIS_INVALID_3x1_SUBPART = ( -80), - BLIS_INVALID_1x3_SUBPART = ( -81), - BLIS_INVALID_3x3_SUBPART = ( -82), + BLIS_INVALID_DIRECTION = ( -90), + BLIS_INVALID_3x1_SUBPART = ( -91), + BLIS_INVALID_1x3_SUBPART = ( -92), + BLIS_INVALID_3x3_SUBPART = ( -93), + BLIS_ROW_OFFSET_LESS_THAN_ZERO = ( -94), + BLIS_ROW_OFFSET_EXCEEDS_NUM_ROWS = ( -95), + BLIS_COL_OFFSET_LESS_THAN_ZERO = ( -96), + BLIS_COL_OFFSET_EXCEEDS_NUM_COLS = ( -97), + BLIS_VECTOR_OFFSET_LESS_THAN_ZERO = ( -98), + BLIS_VECTOR_OFFSET_EXCEEDS_NUM_ELEM = ( -99), // Control tree-specific errors - BLIS_UNEXPECTED_NULL_CONTROL_TREE = ( -90), + BLIS_UNEXPECTED_NULL_CONTROL_TREE = (-100), // Packing-specific errors - BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = (-100), + BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = (-110), // Buffer-specific errors - BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-110), + BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-120), // Memory errors - BLIS_MALLOC_RETURNED_NULL = (-120), + BLIS_MALLOC_RETURNED_NULL = (-130), // Internal memory pool errors - BLIS_INVALID_PACKBUF = (-130), - BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-131), - BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-132), - BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-133), - BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-134), + BLIS_INVALID_PACKBUF = (-140), + BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-141), + BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-142), + BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-143), + BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-144), + BLIS_MEM_POOL_BLOCKS_OUTSTANDING = (-145), // Object-related errors - BLIS_EXPECTED_OBJECT_ALIAS = (-140), + BLIS_EXPECTED_OBJECT_ALIAS = (-150), // Architecture-related errors - BLIS_INVALID_ARCH_ID = (-150), - BLIS_UNINITIALIZED_GKS_CNTX = (-151), - BLIS_INVALID_UKR_ID = (-152), + BLIS_INVALID_ARCH_ID = (-160), + BLIS_UNINITIALIZED_GKS_CNTX = (-161), + BLIS_INVALID_UKR_ID = (-162), // Blocksize-related errors - BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-160), - BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-161), - BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-162), - BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-163), - BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-164), - BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-165), - - BLIS_ERROR_CODE_MAX = (-170) + BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-170), + BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-171), + BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-172), + BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-173), + BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-174), + BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-175), + + // Thread-related errors + BLIS_EXPECTED_DIFF_NUM_THREADS = (-180), + + BLIS_ERROR_CODE_MAX = (-190) } err_t; #endif diff --git a/frame/include/blis.h b/frame/include/blis.h index 98ebee878d..cac3d29258 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -124,16 +124,15 @@ extern "C" { #include "bli_array.h" #include "bli_apool.h" #include "bli_sba.h" -#include "bli_memsys.h" #include "bli_mem.h" #include "bli_part.h" #include "bli_prune.h" #include "bli_query.h" #include "bli_auxinfo.h" +#include "bli_error.h" #include "bli_param_map.h" #include "bli_clock.h" #include "bli_check.h" -#include "bli_error.h" #include "bli_f2c.h" #include "bli_machval.h" #include "bli_getopt.h" @@ -150,7 +149,7 @@ extern "C" { #include "bli_setri.h" #include "bli_castm.h" -#include "bli_castnzm.h" +//#include "bli_castnzm.h" #include "bli_castv.h" #include "bli_projm.h" #include "bli_projv.h" diff --git a/frame/thread/bli_l3_decor_openmp.c b/frame/thread/bli_l3_decor_openmp.c index 2c71c75321..f2b4b765be 100644 --- a/frame/thread/bli_l3_decor_openmp.c +++ b/frame/thread/bli_l3_decor_openmp.c @@ -72,7 +72,8 @@ void bli_l3_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field @@ -85,7 +86,8 @@ void bli_l3_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allocate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); _Pragma( "omp parallel num_threads(n_threads)" ) diff --git a/frame/thread/bli_l3_decor_pthreads.c b/frame/thread/bli_l3_decor_pthreads.c index 80247dfb1c..3d1b88d101 100644 --- a/frame/thread/bli_l3_decor_pthreads.c +++ b/frame/thread/bli_l3_decor_pthreads.c @@ -164,7 +164,8 @@ void bli_l3_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field @@ -177,7 +178,8 @@ void bli_l3_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allocate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); // Allocate an array of pthread objects and auxiliary data structs to pass // to the thread entry functions. diff --git a/frame/thread/bli_l3_decor_single.c b/frame/thread/bli_l3_decor_single.c index c2c43b3703..30c98304b2 100644 --- a/frame/thread/bli_l3_decor_single.c +++ b/frame/thread/bli_l3_decor_single.c @@ -75,7 +75,8 @@ void bli_l3_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we can create the global comm below. @@ -85,7 +86,8 @@ void bli_l3_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allcoate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); { diff --git a/frame/thread/bli_l3_sup_decor_openmp.c b/frame/thread/bli_l3_sup_decor_openmp.c index ff6bc667d3..ba6f580086 100644 --- a/frame/thread/bli_l3_sup_decor_openmp.c +++ b/frame/thread/bli_l3_sup_decor_openmp.c @@ -66,7 +66,8 @@ err_t bli_l3_sup_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field diff --git a/frame/thread/bli_l3_sup_decor_pthreads.c b/frame/thread/bli_l3_sup_decor_pthreads.c index 375a85730e..b6eef42632 100644 --- a/frame/thread/bli_l3_sup_decor_pthreads.c +++ b/frame/thread/bli_l3_sup_decor_pthreads.c @@ -133,7 +133,8 @@ err_t bli_l3_sup_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field diff --git a/frame/thread/bli_l3_sup_decor_single.c b/frame/thread/bli_l3_sup_decor_single.c index df767ad292..665000f304 100644 --- a/frame/thread/bli_l3_sup_decor_single.c +++ b/frame/thread/bli_l3_sup_decor_single.c @@ -61,7 +61,8 @@ err_t bli_l3_sup_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. bli_sba_rntm_set_pool( 0, array, rntm ); diff --git a/frame/thread/bli_thrcomm.h b/frame/thread/bli_thrcomm.h index d0ffb13461..c3565fbd57 100644 --- a/frame/thread/bli_thrcomm.h +++ b/frame/thread/bli_thrcomm.h @@ -52,15 +52,15 @@ BLIS_INLINE dim_t bli_thrcomm_num_threads( thrcomm_t* comm ) // Thread communicator prototypes. -thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ); -void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ); -void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ); -void bli_thrcomm_cleanup( thrcomm_t* comm ); +err_t bli_thrcomm_create( rntm_t* rntm, dim_t n_threads, thrcomm_t** comm ); +void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ); +void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ); +void bli_thrcomm_cleanup( thrcomm_t* comm ); BLIS_EXPORT_BLIS void bli_thrcomm_barrier( dim_t thread_id, thrcomm_t* comm ); BLIS_EXPORT_BLIS void* bli_thrcomm_bcast( dim_t inside_id, void* to_send, thrcomm_t* comm ); -void bli_thrcomm_barrier_atomic( dim_t thread_id, thrcomm_t* comm ); +void bli_thrcomm_barrier_atomic( dim_t thread_id, thrcomm_t* comm ); #endif diff --git a/frame/thread/bli_thrcomm_openmp.c b/frame/thread/bli_thrcomm_openmp.c index 9bb35ea31a..0c7e81af93 100644 --- a/frame/thread/bli_thrcomm_openmp.c +++ b/frame/thread/bli_thrcomm_openmp.c @@ -37,17 +37,24 @@ #ifdef BLIS_ENABLE_OPENMP -thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) +err_t bli_thrcomm_create( rntm_t* rntm, dim_t n_threads, thrcomm_t** comm ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif - thrcomm_t* comm = bli_sba_acquire( rntm, sizeof(thrcomm_t) ); + err_t r_val; + thrcomm_t* tc; - bli_thrcomm_init( n_threads, comm ); + r_val = bli_sba_acquire( rntm, sizeof( thrcomm_t ), ( void** )&tc ); + bli_check_return_if_failure( r_val ); - return comm; + bli_thrcomm_init( n_threads, tc ); + + // Set the thrcomm_t pointer. + *comm = tc; + + return BLIS_SUCCESS; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) diff --git a/frame/thread/bli_thrcomm_pthreads.c b/frame/thread/bli_thrcomm_pthreads.c index d0896f94df..d057bf3b1e 100644 --- a/frame/thread/bli_thrcomm_pthreads.c +++ b/frame/thread/bli_thrcomm_pthreads.c @@ -37,17 +37,24 @@ #ifdef BLIS_ENABLE_PTHREADS -thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) +err_t bli_thrcomm_create( rntm_t* rntm, dim_t n_threads, thrcomm_t** comm ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif - thrcomm_t* comm = bli_sba_acquire( rntm, sizeof(thrcomm_t) ); + err_t r_val; + thrcomm_t* tc; - bli_thrcomm_init( n_threads, comm ); + r_val = bli_sba_acquire( rntm, sizeof( thrcomm_t ), ( void** )&tc ); + bli_check_return_if_failure( r_val ); - return comm; + bli_thrcomm_init( n_threads, tc ); + + // Set the thrcomm_t pointer. + *comm = tc; + + return BLIS_SUCCESS; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) diff --git a/frame/thread/bli_thrcomm_single.c b/frame/thread/bli_thrcomm_single.c index cedb3c5b6e..87014a902e 100644 --- a/frame/thread/bli_thrcomm_single.c +++ b/frame/thread/bli_thrcomm_single.c @@ -37,18 +37,24 @@ #ifndef BLIS_ENABLE_MULTITHREADING -//Constructors and destructors for constructors -thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) +err_t bli_thrcomm_create( rntm_t* rntm, dim_t n_threads, thrcomm_t** comm ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif - thrcomm_t* comm = bli_sba_acquire( rntm, sizeof( thrcomm_t ) ); + err_t r_val; + thrcomm_t* tc; - bli_thrcomm_init( n_threads, comm ); + r_val = bli_sba_acquire( rntm, sizeof( thrcomm_t ), ( void** )&tc ); + bli_check_return_if_failure( r_val ); - return comm; + bli_thrcomm_init( n_threads, tc ); + + // Set the thrcomm_t pointer. + *comm = tc; + + return BLIS_SUCCESS; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) diff --git a/frame/thread/bli_thread.c b/frame/thread/bli_thread.c index 7d647a314b..30db047b4d 100644 --- a/frame/thread/bli_thread.c +++ b/frame/thread/bli_thread.c @@ -46,10 +46,34 @@ extern rntm_t global_rntm; // resides in bli_rntm.c.) extern bli_pthread_mutex_t global_rntm_mutex; +// A boolean that tracks whether bli_thread_init() has completed successfully. +static bool thread_is_init = FALSE; + // ----------------------------------------------------------------------------- -void bli_thread_init( void ) +bool bli_thread_is_init( void ) +{ + return thread_is_init; +} + +void bli_thread_mark_init( void ) +{ + thread_is_init = TRUE; +} + +void bli_thread_mark_uninit( void ) { + thread_is_init = FALSE; +} + +// ----------------------------------------------------------------------------- + +err_t bli_thread_init( void ) +{ + // Sanity check: Return early if the API is already initialized. + if ( bli_thread_is_init() ) return BLIS_SUCCESS; + + // Initialize some global communicators. bli_thrcomm_init( 1, &BLIS_SINGLE_COMM ); bli_packm_thrinfo_init_single( &BLIS_PACKM_SINGLE_THREADED ); bli_l3_thrinfo_init_single( &BLIS_GEMM_SINGLE_THREADED ); @@ -57,10 +81,22 @@ void bli_thread_init( void ) // Read the environment variables and use them to initialize the // global runtime object. bli_thread_init_rntm_from_env( &global_rntm ); + + // Mark the API as initialized. + bli_thread_mark_init(); + + return BLIS_SUCCESS; } -void bli_thread_finalize( void ) +err_t bli_thread_finalize( void ) { + // Sanity check: Return early if the API is uninitialized. + if ( !bli_thread_is_init() ) return BLIS_SUCCESS; + + // Mark the API as uninitialized. + bli_thread_mark_uninit(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -1509,7 +1545,7 @@ dim_t bli_ipow( dim_t base, dim_t power ) dim_t bli_thread_get_jc_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_jc_ways( &global_rntm ); } @@ -1517,7 +1553,7 @@ dim_t bli_thread_get_jc_nt( void ) dim_t bli_thread_get_pc_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_pc_ways( &global_rntm ); } @@ -1525,7 +1561,7 @@ dim_t bli_thread_get_pc_nt( void ) dim_t bli_thread_get_ic_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_ic_ways( &global_rntm ); } @@ -1533,7 +1569,7 @@ dim_t bli_thread_get_ic_nt( void ) dim_t bli_thread_get_jr_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_jr_ways( &global_rntm ); } @@ -1541,7 +1577,7 @@ dim_t bli_thread_get_jr_nt( void ) dim_t bli_thread_get_ir_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_ir_ways( &global_rntm ); } @@ -1549,17 +1585,17 @@ dim_t bli_thread_get_ir_nt( void ) dim_t bli_thread_get_num_threads( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_num_threads( &global_rntm ); } // ---------------------------------------------------------------------------- -void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ) +err_t bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); @@ -1568,12 +1604,14 @@ void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ) // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); + + return BLIS_SUCCESS; } -void bli_thread_set_num_threads( dim_t n_threads ) +err_t bli_thread_set_num_threads( dim_t n_threads ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); @@ -1582,6 +1620,8 @@ void bli_thread_set_num_threads( dim_t n_threads ) // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); + + return BLIS_SUCCESS; } // ---------------------------------------------------------------------------- diff --git a/frame/thread/bli_thread.h b/frame/thread/bli_thread.h index 5e9c650b5b..68c03857e5 100644 --- a/frame/thread/bli_thread.h +++ b/frame/thread/bli_thread.h @@ -58,8 +58,12 @@ #include "bli_l3_sup_decor.h" // Initialization-related prototypes. -void bli_thread_init( void ); -void bli_thread_finalize( void ); +bool bli_thread_is_init( void ); +void bli_thread_mark_init( void ); +void bli_thread_mark_uninit( void ); + +err_t bli_thread_init( void ); +err_t bli_thread_finalize( void ); // Thread range-related prototypes. @@ -202,8 +206,8 @@ BLIS_EXPORT_BLIS dim_t bli_thread_get_jr_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_ir_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_num_threads( void ); -BLIS_EXPORT_BLIS void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ); -BLIS_EXPORT_BLIS void bli_thread_set_num_threads( dim_t value ); +BLIS_EXPORT_BLIS err_t bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ); +BLIS_EXPORT_BLIS err_t bli_thread_set_num_threads( dim_t value ); void bli_thread_init_rntm_from_env( rntm_t* rntm ); diff --git a/frame/thread/bli_thrinfo.c b/frame/thread/bli_thrinfo.c index bbe7114009..e5d59362d4 100644 --- a/frame/thread/bli_thrinfo.c +++ b/frame/thread/bli_thrinfo.c @@ -35,23 +35,28 @@ #include "blis.h" -thrinfo_t* bli_thrinfo_create +err_t bli_thrinfo_create ( - rntm_t* rntm, - thrcomm_t* ocomm, - dim_t ocomm_id, - dim_t n_way, - dim_t work_id, - bool free_comm, - bszid_t bszid, - thrinfo_t* sub_node + rntm_t* rntm, + thrcomm_t* ocomm, + dim_t ocomm_id, + dim_t n_way, + dim_t work_id, + bool free_comm, + bszid_t bszid, + thrinfo_t* sub_node, + thrinfo_t** node ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrinfo_create(): " ); #endif - thrinfo_t* thread = bli_sba_acquire( rntm, sizeof( thrinfo_t ) ); + err_t r_val; + thrinfo_t* thread; + + r_val = bli_sba_acquire( rntm, sizeof( thrinfo_t ), ( void** )&thread ); + bli_check_return_if_failure( r_val ); bli_thrinfo_init ( @@ -63,7 +68,10 @@ thrinfo_t* bli_thrinfo_create sub_node ); - return thread; + // Set the thrinfo_t pointer. + *node = thread; + + return BLIS_SUCCESS; } void bli_thrinfo_init @@ -153,13 +161,15 @@ void bli_thrinfo_free // ----------------------------------------------------------------------------- -void bli_thrinfo_grow +err_t bli_thrinfo_grow ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { + err_t r_val; + // First, consider the prenode branch of the thrinfo_t tree, which should be // expanded only if there exists a prenode branch in the cntl_t tree. @@ -169,26 +179,23 @@ void bli_thrinfo_grow // is non-NULL, then it has already been created and we'll use it as-is. if ( bli_thrinfo_sub_prenode( thread ) == NULL ) { - // Assertion / sanity check. - if ( bli_cntl_bszid( cntl ) != BLIS_MC ) - { - printf( "Assertion failed: Expanding prenode for non-IC loop?\n" ); - bli_abort(); - } - // Now we must create the packa, jr, and ir nodes that make up // the prenode branch of current cntl_t node. + thrinfo_t* thread_prenode; + // Create a new node (or, if needed, multiple nodes) along the // prenode branch of the tree and return the pointer to the // (highest) child. - thrinfo_t* thread_prenode = bli_thrinfo_rgrow_prenode + r_val = bli_thrinfo_rgrow_prenode ( rntm, cntl, bli_cntl_sub_prenode( cntl ), - thread + thread, + &thread_prenode ); + bli_check_return_if_failure( r_val ); // Attach the child thrinfo_t node for the secondary branch to its // parent structure. @@ -206,35 +213,42 @@ void bli_thrinfo_grow // is non-NULL, then it has already been created and we'll use it as-is. if ( bli_thrinfo_sub_node( thread ) == NULL ) { + thrinfo_t* thread_child; + // Create a new node (or, if needed, multiple nodes) along the // main sub-node branch of the tree and return the pointer to the // (highest) child. - thrinfo_t* thread_child = bli_thrinfo_rgrow + r_val = bli_thrinfo_rgrow ( rntm, cntl, bli_cntl_sub_node( cntl ), - thread + thread, + &thread_child ); + bli_check_return_if_failure( r_val ); // Attach the child thrinfo_t node for the primary branch to its // parent structure. bli_thrinfo_set_sub_node( thread_child, thread ); } } + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -thrinfo_t* bli_thrinfo_rgrow +err_t bli_thrinfo_rgrow ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_cur, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ) { - thrinfo_t* thread_cur; + err_t r_val; // We must handle two cases: those where the next node in the // control tree is a partitioning node, and those where it is @@ -243,25 +257,31 @@ thrinfo_t* bli_thrinfo_rgrow { // Create the child thrinfo_t node corresponding to cntl_cur, // with cntl_par being the parent. - thread_cur = bli_thrinfo_create_for_cntl + r_val = bli_thrinfo_create_for_cntl ( rntm, cntl_par, cntl_cur, - thread_par + thread_par, + thread_cur ); + bli_check_return_if_failure( r_val ); } else // if ( bli_cntl_bszid( cntl_cur ) == BLIS_NO_PART ) { + thrinfo_t* thread_seg; + // Recursively grow the thread structure and return the top-most // thrinfo_t node of that segment. - thrinfo_t* thread_seg = bli_thrinfo_rgrow + r_val = bli_thrinfo_rgrow ( rntm, cntl_par, bli_cntl_sub_node( cntl_cur ), - thread_par + thread_par, + &thread_seg ); + bli_check_return_if_failure( r_val ); // Create a thrinfo_t node corresponding to cntl_cur. Since the // corresponding cntl node, cntl_cur, is a non-partitioning node @@ -272,7 +292,7 @@ thrinfo_t* bli_thrinfo_rgrow // to FALSE since cntl_cur is a non-partitioning node. The reason: // the communicator used here will be freed when thread_seg, or one // of its descendents, is freed. - thread_cur = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm bli_thrinfo_ocomm( thread_seg ), // ocomm @@ -281,28 +301,33 @@ thrinfo_t* bli_thrinfo_rgrow bli_thread_ocomm_id( thread_seg ), // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - thread_seg // sub_node + thread_seg, // sub_node + thread_cur // node ); + bli_check_return_if_failure( r_val ); } - return thread_cur; + return BLIS_SUCCESS; } #define BLIS_NUM_STATIC_COMMS 80 -thrinfo_t* bli_thrinfo_create_for_cntl +err_t bli_thrinfo_create_for_cntl ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_chl, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ) { + err_t r_val; + // If we are running with a single thread, all of the code can be reduced // and simplified to this. if ( bli_rntm_calc_num_threads( rntm ) == 1 ) { - thrinfo_t* thread_chl = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm &BLIS_SINGLE_COMM, // ocomm @@ -311,9 +336,12 @@ thrinfo_t* bli_thrinfo_create_for_cntl 0, // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - NULL // sub_node + NULL, // sub_node + thread_chl // node ); - return thread_chl; + bli_check_return_if_failure( r_val ); + + return BLIS_SUCCESS; } thrcomm_t* static_comms[ BLIS_NUM_STATIC_COMMS ]; @@ -321,18 +349,20 @@ thrinfo_t* bli_thrinfo_create_for_cntl const bszid_t bszid_chl = bli_cntl_bszid( cntl_chl ); - const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); + //const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); const dim_t parent_n_way = bli_thread_n_way( thread_par ); const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); const dim_t parent_work_id = bli_thread_work_id( thread_par ); +#if 0 // Sanity check: make sure the number of threads in the parent's // communicator is divisible by the number of new sub-groups. if ( parent_nt_in % parent_n_way != 0 ) { printf( "Assertion failed: parent_nt_in parent_n_way != 0\n" ); - bli_abort(); + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } +#endif // Compute: // - the number of threads inside the new child comm, @@ -350,13 +380,23 @@ thrinfo_t* bli_thrinfo_create_for_cntl // pointers. if ( bli_thread_am_ochief( thread_par ) ) { - err_t r_val; - if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) + { new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val ); + } else - new_comms = static_comms; + { + new_comms = static_comms; r_val = BLIS_SUCCESS; + } } + else + { + r_val = BLIS_SUCCESS; + } + + // If the master thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_thread0_return_if_failure( &r_val, thread_par ); // Broadcast the temporary array to all threads in the parent's // communicator. @@ -366,13 +406,23 @@ thrinfo_t* bli_thrinfo_create_for_cntl // object and store it in the array element corresponding to the // parent's work id. if ( child_comm_id == 0 ) - new_comms[ parent_work_id ] = bli_thrcomm_create( rntm, child_nt_in ); + { + r_val = bli_thrcomm_create( rntm, child_nt_in, &new_comms[ parent_work_id ] ); + } + else + { + r_val = BLIS_SUCCESS; + } + + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); bli_thread_barrier( thread_par ); // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm new_comms[ parent_work_id ], // ocomm @@ -381,9 +431,14 @@ thrinfo_t* bli_thrinfo_create_for_cntl child_work_id, // work_id TRUE, // free_comm bszid_chl, // bszid - NULL // sub_node + NULL, // sub_node + thread_chl // node ); + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); + bli_thread_barrier( thread_par ); // The parent's chief thread frees the temporary array of thrcomm_t @@ -394,20 +449,21 @@ thrinfo_t* bli_thrinfo_create_for_cntl bli_free_intl( new_comms ); } - return thread_chl; + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -thrinfo_t* bli_thrinfo_rgrow_prenode +err_t bli_thrinfo_rgrow_prenode ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_cur, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ) { - thrinfo_t* thread_cur; + err_t r_val; // We must handle two cases: those where the next node in the // control tree is a partitioning node, and those where it is @@ -416,25 +472,31 @@ thrinfo_t* bli_thrinfo_rgrow_prenode { // Create the child thrinfo_t node corresponding to cntl_cur, // with cntl_par being the parent. - thread_cur = bli_thrinfo_create_for_cntl_prenode + r_val = bli_thrinfo_create_for_cntl_prenode ( rntm, cntl_par, cntl_cur, - thread_par + thread_par, + thread_cur ); + bli_check_return_if_failure( r_val ); } else // if ( bli_cntl_bszid( cntl_cur ) == BLIS_NO_PART ) { + thrinfo_t* thread_seg; + // Recursively grow the thread structure and return the top-most // thrinfo_t node of that segment. - thrinfo_t* thread_seg = bli_thrinfo_rgrow_prenode + r_val = bli_thrinfo_rgrow_prenode ( rntm, cntl_par, bli_cntl_sub_node( cntl_cur ), - thread_par + thread_par, + &thread_seg ); + bli_check_return_if_failure( r_val ); // Create a thrinfo_t node corresponding to cntl_cur. Since the // corresponding cntl node, cntl_cur, is a non-partitioning node @@ -445,7 +507,7 @@ thrinfo_t* bli_thrinfo_rgrow_prenode // to FALSE since cntl_cur is a non-partitioning node. The reason: // the communicator used here will be freed when thread_seg, or one // of its descendents, is freed. - thread_cur = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm bli_thrinfo_ocomm( thread_seg ), // ocomm @@ -454,19 +516,22 @@ thrinfo_t* bli_thrinfo_rgrow_prenode bli_thread_ocomm_id( thread_seg ), // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - thread_seg // sub_node + thread_seg, // sub_node + thread_cur // node ); + bli_check_return_if_failure( r_val ); } - return thread_cur; + return BLIS_SUCCESS; } -thrinfo_t* bli_thrinfo_create_for_cntl_prenode +err_t bli_thrinfo_create_for_cntl_prenode ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_chl, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_chl, + thrinfo_t* thread_par, + thrinfo_t** thread ) { // NOTE: This function only has to work for the ic -> (pa -> jr) @@ -474,21 +539,25 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode // bli_thrinfo_create_for_cntl() will be called for the last jr->ir // branch extension. + err_t r_val; + const bszid_t bszid_chl = bli_cntl_bszid( cntl_chl ); const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); - const dim_t parent_n_way = bli_thread_n_way( thread_par ); + //const dim_t parent_n_way = bli_thread_n_way( thread_par ); const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); //const dim_t parent_work_id = bli_thread_work_id( thread_par ); +#if 0 // Sanity check: make sure the number of threads in the parent's // communicator is divisible by the number of new sub-groups. if ( parent_nt_in % parent_n_way != 0 ) { printf( "Assertion failed: parent_nt_in (%d) parent_n_way (%d) != 0\n", ( int )parent_nt_in, ( int )parent_n_way ); - bli_abort(); + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } +#endif //dim_t child_nt_in = bli_cntl_calc_num_threads_in( rntm, cntl_chl ); //dim_t child_n_way = bli_rntm_ways_for( bszid_chl, rntm ); @@ -503,16 +572,25 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode // parent's chief-ness is equivalent to checking for chief-ness in the new // about-to-be-created communicator group. thrcomm_t* new_comm = NULL; + if ( bli_thread_am_ochief( thread_par ) ) - new_comm = bli_thrcomm_create( rntm, child_nt_in ); + r_val = bli_thrcomm_create( rntm, child_nt_in, &new_comm ); + else + r_val = BLIS_SUCCESS; + + // If the master thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_thread0_return_if_failure( &r_val, thread_par ); // Broadcast the new thrcomm_t address to the other threads in the // parent's group. new_comm = bli_thread_broadcast( thread_par, new_comm ); + thrinfo_t* thread_chl = NULL; + // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm new_comm, // ocomm @@ -521,12 +599,20 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode child_work_id, // work_id TRUE, // free_comm bszid_chl, // bszid - NULL // sub_node + NULL, // sub_node + &thread_chl // node ); + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); + bli_thread_barrier( thread_par ); - return thread_chl; + // Set the thrinfo_t pointer. + *thread = thread_chl; + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- diff --git a/frame/thread/bli_thrinfo.h b/frame/thread/bli_thrinfo.h index 6b98096849..b793f14e20 100644 --- a/frame/thread/bli_thrinfo.h +++ b/frame/thread/bli_thrinfo.h @@ -186,16 +186,17 @@ BLIS_INLINE void bli_thread_barrier( const thrinfo_t* t ) // Prototypes for level-3 thrinfo functions not specific to any operation. // -thrinfo_t* bli_thrinfo_create +err_t bli_thrinfo_create ( - rntm_t* rntm, - thrcomm_t* ocomm, - dim_t ocomm_id, - dim_t n_way, - dim_t work_id, - bool free_comm, - bszid_t bszid, - thrinfo_t* sub_node + rntm_t* rntm, + thrcomm_t* ocomm, + dim_t ocomm_id, + dim_t n_way, + dim_t work_id, + bool free_comm, + bszid_t bszid, + thrinfo_t* sub_node, + thrinfo_t** node ); void bli_thrinfo_init @@ -223,43 +224,47 @@ void bli_thrinfo_free // ----------------------------------------------------------------------------- -void bli_thrinfo_grow +err_t bli_thrinfo_grow ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); -thrinfo_t* bli_thrinfo_rgrow +err_t bli_thrinfo_rgrow ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_cur, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ); -thrinfo_t* bli_thrinfo_create_for_cntl +err_t bli_thrinfo_create_for_cntl ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_chl, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ); -thrinfo_t* bli_thrinfo_rgrow_prenode +err_t bli_thrinfo_rgrow_prenode ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_cur, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ); -thrinfo_t* bli_thrinfo_create_for_cntl_prenode +err_t bli_thrinfo_create_for_cntl_prenode ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_chl, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ); // ----------------------------------------------------------------------------- diff --git a/frame/thread/bli_thrinfo_sup.c b/frame/thread/bli_thrinfo_sup.c index 966247fd04..f450247b9e 100644 --- a/frame/thread/bli_thrinfo_sup.c +++ b/frame/thread/bli_thrinfo_sup.c @@ -35,15 +35,17 @@ #include "blis.h" -void bli_thrinfo_sup_grow +err_t bli_thrinfo_sup_grow ( rntm_t* rntm, const bszid_t* bszid_par, thrinfo_t* thread ) { + err_t r_val; + if ( thread == &BLIS_GEMM_SINGLE_THREADED || - thread == &BLIS_PACKM_SINGLE_THREADED ) return; + thread == &BLIS_PACKM_SINGLE_THREADED ) return BLIS_SUCCESS; // NOTE: If bli_thrinfo_sup_rgrow() is being called, the sub_node field will // always be non-NULL, and so there's no need to check it. @@ -53,35 +55,42 @@ void bli_thrinfo_sup_grow // is non-NULL, then it has already been created and we'll use it as-is. if ( bli_thrinfo_sub_node( thread ) == NULL ) { + thrinfo_t* thread_child; + // Create a new node (or, if needed, multiple nodes) along the // main sub-node branch of the tree and return the pointer to the // (highest) child. - thrinfo_t* thread_child = bli_thrinfo_sup_rgrow + r_val = bli_thrinfo_sup_rgrow ( rntm, bszid_par, &bszid_par[1], - thread + thread, + &thread_child ); + bli_check_return_if_failure( r_val ); // Attach the child thrinfo_t node for the primary branch to its // parent structure. bli_thrinfo_set_sub_node( thread_child, thread ); } } + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -thrinfo_t* bli_thrinfo_sup_rgrow +err_t bli_thrinfo_sup_rgrow ( - rntm_t* rntm, - const bszid_t* bszid_par, - const bszid_t* bszid_cur, - thrinfo_t* thread_par + rntm_t* rntm, + const bszid_t* bszid_par, + const bszid_t* bszid_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ) { - thrinfo_t* thread_cur; + err_t r_val; // We must handle two cases: those where the next node in the // control tree is a partitioning node, and those where it is @@ -90,25 +99,31 @@ thrinfo_t* bli_thrinfo_sup_rgrow { // Create the child thrinfo_t node corresponding to cntl_cur, // with cntl_par being the parent. - thread_cur = bli_thrinfo_sup_create_for_cntl + r_val = bli_thrinfo_sup_create_for_cntl ( rntm, bszid_par, bszid_cur, - thread_par + thread_par, + thread_cur ); + bli_check_return_if_failure( r_val ); } else // if ( *bszid_cur == BLIS_NO_PART ) { + thrinfo_t* thread_seg; + // Recursively grow the thread structure and return the top-most // thrinfo_t node of that segment. - thrinfo_t* thread_seg = bli_thrinfo_sup_rgrow + r_val = bli_thrinfo_sup_rgrow ( rntm, bszid_par, &bszid_cur[1], - thread_par + thread_par, + &thread_seg ); + bli_check_return_if_failure( r_val ); // Create a thrinfo_t node corresponding to cntl_cur. Since the // corresponding cntl node, cntl_cur, is a non-partitioning node @@ -119,7 +134,7 @@ thrinfo_t* bli_thrinfo_sup_rgrow // to FALSE since cntl_cur is a non-partitioning node. The reason: // the communicator used here will be freed when thread_seg, or one // of its descendents, is freed. - thread_cur = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm bli_thrinfo_ocomm( thread_seg ), // ocomm @@ -128,28 +143,35 @@ thrinfo_t* bli_thrinfo_sup_rgrow bli_thread_ocomm_id( thread_seg ), // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - thread_seg // sub_node + thread_seg, // sub_node + thread_cur // node ); + bli_check_return_if_failure( r_val ); } - return thread_cur; + return BLIS_SUCCESS; } #define BLIS_NUM_STATIC_COMMS 80 -thrinfo_t* bli_thrinfo_sup_create_for_cntl +err_t bli_thrinfo_sup_create_for_cntl ( - rntm_t* rntm, - const bszid_t* bszid_par, - const bszid_t* bszid_chl, - thrinfo_t* thread_par + rntm_t* rntm, + const bszid_t* bszid_par, + const bszid_t* bszid_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ) { + err_t r_val; + // If we are running with a single thread, all of the code can be reduced // and simplified to this. if ( bli_rntm_calc_num_threads( rntm ) == 1 ) { - thrinfo_t* thread_chl = bli_thrinfo_create + thrinfo_t* thread_chl; + + r_val = bli_thrinfo_create ( rntm, // rntm &BLIS_SINGLE_COMM, // ocomm @@ -158,10 +180,12 @@ thrinfo_t* bli_thrinfo_sup_create_for_cntl 0, // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - NULL // sub_node + NULL, // sub_node + &thread_chl // node ); + bli_check_return_if_failure( r_val ); - return thread_chl; + return BLIS_SUCCESS; } // The remainder of this function handles the cases involving the use of @@ -189,7 +213,9 @@ thrinfo_t* bli_thrinfo_sup_create_for_cntl // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl = bli_thrinfo_create + thrinfo_t* thread_chl; + + r_val = bli_thrinfo_create ( rntm, // rntm NULL, // ocomm @@ -198,93 +224,120 @@ thrinfo_t* bli_thrinfo_sup_create_for_cntl child_work_id, // work_id TRUE, // free_comm *bszid_chl, // bszid - NULL // sub_node + NULL, // sub_node + &thread_chl // node ); + bli_check_return_if_failure( r_val ); - return thread_chl; + return BLIS_SUCCESS; } - else - { - // If we are packing at least one of A or B, then we use the general - // approach that employs broadcasts and barriers. - thrcomm_t* static_comms[ BLIS_NUM_STATIC_COMMS ]; - thrcomm_t** new_comms = NULL; + // If we are packing at least one of A or B, then we use the general + // approach that employs broadcasts and barriers. - const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); - const dim_t parent_n_way = bli_thread_n_way( thread_par ); - const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); - const dim_t parent_work_id = bli_thread_work_id( thread_par ); + thrcomm_t* static_comms[ BLIS_NUM_STATIC_COMMS ]; + thrcomm_t** new_comms = NULL; - // Sanity check: make sure the number of threads in the parent's - // communicator is divisible by the number of new sub-groups. - if ( parent_nt_in % parent_n_way != 0 ) - { - printf( "Assertion failed: parent_nt_in parent_n_way != 0\n" ); - bli_abort(); - } + //const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); + const dim_t parent_n_way = bli_thread_n_way( thread_par ); + const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); + const dim_t parent_work_id = bli_thread_work_id( thread_par ); - // Compute: - // - the number of threads inside the new child comm, - // - the current thread's id within the new communicator, - // - the current thread's work id, given the ways of parallelism - // to be obtained within the next loop. - const dim_t child_nt_in = bli_rntm_calc_num_threads_in( bszid_chl, rntm ); - const dim_t child_n_way = bli_rntm_ways_for( *bszid_chl, rntm ); - const dim_t child_comm_id = parent_comm_id % child_nt_in; - const dim_t child_work_id = child_comm_id / ( child_nt_in / child_n_way ); + #if 0 + // Sanity check: make sure the number of threads in the parent's + // communicator is divisible by the number of new sub-groups. + if ( parent_nt_in % parent_n_way != 0 ) + { + printf( "Assertion failed: parent_nt_in parent_n_way != 0\n" ); + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); + } + #endif + + // Compute: + // - the number of threads inside the new child comm, + // - the current thread's id within the new communicator, + // - the current thread's work id, given the ways of parallelism + // to be obtained within the next loop. + const dim_t child_nt_in = bli_rntm_calc_num_threads_in( bszid_chl, rntm ); + const dim_t child_n_way = bli_rntm_ways_for( *bszid_chl, rntm ); + const dim_t child_comm_id = parent_comm_id % child_nt_in; + const dim_t child_work_id = child_comm_id / ( child_nt_in / child_n_way ); //printf( "thread %d: child_n_way = %d child_nt_in = %d parent_n_way = %d (bszid = %d->%d)\n", (int)child_comm_id, (int)child_nt_in, (int)child_n_way, (int)parent_n_way, (int)bli_cntl_bszid( cntl_par ), (int)bszid_chl ); - // The parent's chief thread creates a temporary array of thrcomm_t - // pointers. - if ( bli_thread_am_ochief( thread_par ) ) + // The parent's chief thread creates a temporary array of thrcomm_t + // pointers. + if ( bli_thread_am_ochief( thread_par ) ) + { + if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) { - err_t r_val; - - if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) - new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val ); - else - new_comms = static_comms; + new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val ); } + else + { + new_comms = static_comms; r_val = BLIS_SUCCESS; + } + } + else + { + r_val = BLIS_SUCCESS; + } - // Broadcast the temporary array to all threads in the parent's - // communicator. - new_comms = bli_thread_broadcast( thread_par, new_comms ); - - // Chiefs in the child communicator allocate the communicator - // object and store it in the array element corresponding to the - // parent's work id. - if ( child_comm_id == 0 ) - new_comms[ parent_work_id ] = bli_thrcomm_create( rntm, child_nt_in ); - - bli_thread_barrier( thread_par ); - - // All threads create a new thrinfo_t node using the communicator - // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl = bli_thrinfo_create - ( - rntm, // rntm - new_comms[ parent_work_id ], // ocomm - child_comm_id, // ocomm_id - child_n_way, // n_way - child_work_id, // work_id - TRUE, // free_comm - *bszid_chl, // bszid - NULL // sub_node - ); + // If the master thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_thread0_return_if_failure( &r_val, thread_par ); - bli_thread_barrier( thread_par ); + // Broadcast the temporary array to all threads in the parent's + // communicator. + new_comms = bli_thread_broadcast( thread_par, new_comms ); - // The parent's chief thread frees the temporary array of thrcomm_t - // pointers. - if ( bli_thread_am_ochief( thread_par ) ) - { - if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) - bli_free_intl( new_comms ); - } + // Chiefs in the child communicator allocate the communicator + // object and store it in the array element corresponding to the + // parent's work id. + if ( child_comm_id == 0 ) + { + r_val = bli_thrcomm_create( rntm, child_nt_in, &new_comms[ parent_work_id ] ); + } + else + { + r_val = BLIS_SUCCESS; + } - return thread_chl; + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); + + bli_thread_barrier( thread_par ); + + // All threads create a new thrinfo_t node using the communicator + // that was created by their chief, as identified by parent_work_id. + r_val = bli_thrinfo_create + ( + rntm, // rntm + new_comms[ parent_work_id ], // ocomm + child_comm_id, // ocomm_id + child_n_way, // n_way + child_work_id, // work_id + TRUE, // free_comm + *bszid_chl, // bszid + NULL, // sub_node + thread_chl // node + ); + + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); + + bli_thread_barrier( thread_par ); + + // The parent's chief thread frees the temporary array of thrcomm_t + // pointers. + if ( bli_thread_am_ochief( thread_par ) ) + { + if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) + bli_free_intl( new_comms ); } + + return BLIS_SUCCESS; } diff --git a/frame/thread/bli_thrinfo_sup.h b/frame/thread/bli_thrinfo_sup.h index 1afcd3337e..c2322b8342 100644 --- a/frame/thread/bli_thrinfo_sup.h +++ b/frame/thread/bli_thrinfo_sup.h @@ -40,27 +40,29 @@ // Prototypes for level-3 thrinfo sup functions. // -void bli_thrinfo_sup_grow +err_t bli_thrinfo_sup_grow ( rntm_t* rntm, const bszid_t* bszid_par, thrinfo_t* thread ); -thrinfo_t* bli_thrinfo_sup_rgrow +err_t bli_thrinfo_sup_rgrow ( - rntm_t* rntm, - const bszid_t* bszid_par, - const bszid_t* bszid_cur, - thrinfo_t* thread_par + rntm_t* rntm, + const bszid_t* bszid_par, + const bszid_t* bszid_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ); -thrinfo_t* bli_thrinfo_sup_create_for_cntl +err_t bli_thrinfo_sup_create_for_cntl ( - rntm_t* rntm, - const bszid_t* bszid_par, - const bszid_t* bszid_chl, - thrinfo_t* thread_par + rntm_t* rntm, + const bszid_t* bszid_par, + const bszid_t* bszid_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ); #endif diff --git a/frame/util/bli_util_check.c b/frame/util/bli_util_check.c index a96f6f5e98..0107e69905 100644 --- a/frame/util/bli_util_check.c +++ b/frame/util/bli_util_check.c @@ -147,7 +147,7 @@ void PASTEMAC(opname,_check) \ const bool* is_eq \ ) \ { \ - bli_l0_xxbsc_check( chi, psi, is_eq ); \ + bli_l0_xxbool_check( chi, psi, is_eq ); \ } GENFRONT( eqsc ) diff --git a/frame/util/bli_util_tapi.c b/frame/util/bli_util_tapi.c index abc9c90890..f64b8800a3 100644 --- a/frame/util/bli_util_tapi.c +++ b/frame/util/bli_util_tapi.c @@ -64,7 +64,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -100,7 +100,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, m ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -143,7 +143,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -190,7 +190,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -231,7 +231,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ ctype_r norm; \ \ @@ -290,7 +290,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ ctype_r norm; \ \ @@ -355,7 +355,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -419,7 +419,7 @@ void PASTEMAC(ch,opname) \ if ( bli_zero_dim1( n ) ) { *is_eq = TRUE; return; } \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ *is_eq = PASTEMAC2(ch,opname,_unb_var1) \ ( \ @@ -456,7 +456,7 @@ void PASTEMAC(ch,opname) \ if ( bli_zero_dim2( m, n ) ) { *is_eq = TRUE; return; } \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ /* Invoke the helper variant. */ \ *is_eq = PASTEMAC2(ch,opname,_unb_var1) \ diff --git a/kernels/zen/1/bli_scalv_zen_int10.c b/kernels/zen/1/bli_scalv_zen_int10.c index 7487880b80..f8cd92c4d6 100644 --- a/kernels/zen/1/bli_scalv_zen_int10.c +++ b/kernels/zen/1/bli_scalv_zen_int10.c @@ -82,7 +82,7 @@ void bli_sscalv_zen_int10 { float* zero = bli_s0; - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); ssetv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_FLOAT, BLIS_SETV_KER, cntx ); @@ -276,7 +276,7 @@ void bli_dscalv_zen_int10 { double* zero = bli_d0; - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); dsetv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_SETV_KER, cntx ); @@ -454,7 +454,7 @@ void bli_cscalv_zen_int10 dim_t n, scomplex* restrict alpha, scomplex* restrict x, inc_t incx, - cntx_t* restrict cntx + cntx_t* cntx ) { const num_t dt = BLIS_SCOMPLEX; diff --git a/kernels/zen/1f/bli_axpyf_zen_int_4.c b/kernels/zen/1f/bli_axpyf_zen_int_4.c index ddebc5ee01..10d5fa5731 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_4.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_4.c @@ -46,7 +46,7 @@ void bli_caxpyf_zen_int_4 scomplex* restrict a, inc_t inca, inc_t lda, scomplex* restrict x, inc_t incx, scomplex* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { inc_t fuse_fac = 4; @@ -79,7 +79,7 @@ void bli_caxpyf_zen_int_4 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); caxpyv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_SCOMPLEX, BLIS_AXPYV_KER, cntx ); diff --git a/kernels/zen/1f/bli_axpyf_zen_int_5.c b/kernels/zen/1f/bli_axpyf_zen_int_5.c index 9c8a370e15..e8abba240d 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_5.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_5.c @@ -69,7 +69,7 @@ void bli_saxpyf_zen_int_5 float* restrict a, inc_t inca, inc_t lda, float* restrict x, inc_t incx, float* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { const dim_t fuse_fac = 5; @@ -108,7 +108,7 @@ void bli_saxpyf_zen_int_5 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); saxpyv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_FLOAT, BLIS_AXPYV_KER, cntx ); @@ -321,7 +321,7 @@ void bli_daxpyf_zen_int_5 double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { const dim_t fuse_fac = 5; @@ -360,7 +360,7 @@ void bli_daxpyf_zen_int_5 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); daxpyv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_AXPYV_KER, cntx ); @@ -572,7 +572,7 @@ void bli_daxpyf_zen_int_16x2 double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { const dim_t fuse_fac = 2; @@ -857,7 +857,7 @@ void bli_daxpyf_zen_int_16x4 double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { const dim_t fuse_fac = 4; @@ -899,7 +899,7 @@ void bli_daxpyf_zen_int_16x4 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); daxpyv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_AXPYV_KER, cntx ); diff --git a/sandbox/gemmlike/bli_gemm_ex.c b/sandbox/gemmlike/bli_gemm_ex.c index 96dae1a3a9..f0cd566667 100644 --- a/sandbox/gemmlike/bli_gemm_ex.c +++ b/sandbox/gemmlike/bli_gemm_ex.c @@ -72,11 +72,10 @@ void bli_gemm_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check the operands. if ( bli_error_checking_is_enabled() ) diff --git a/sandbox/gemmlike/bls_gemm.c b/sandbox/gemmlike/bls_gemm.c index ec5d8d5b1f..5e47b96052 100644 --- a/sandbox/gemmlike/bls_gemm.c +++ b/sandbox/gemmlike/bls_gemm.c @@ -75,13 +75,12 @@ void bls_gemm_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check parameters. if ( bli_error_checking_is_enabled() ) diff --git a/sandbox/old/ref99/bli_gemmnat.c b/sandbox/old/ref99/bli_gemmnat.c index 399f31e216..36bba0aaf3 100644 --- a/sandbox/old/ref99/bli_gemmnat.c +++ b/sandbox/old/ref99/bli_gemmnat.c @@ -57,13 +57,12 @@ void bli_gemmnat bli_init_once(); // Obtain a valid (native) context from the gks if necessary. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Invoke the operation's front end. //blx_gemm_front( alpha, a, b, beta, c, cntx, rntm, NULL ); diff --git a/sandbox/power10/bli_gemm_ex.c b/sandbox/power10/bli_gemm_ex.c index 3334dc4a53..5af3d113c2 100644 --- a/sandbox/power10/bli_gemm_ex.c +++ b/sandbox/power10/bli_gemm_ex.c @@ -60,11 +60,10 @@ void bli_gemm_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check the operands. if ( bli_error_checking_is_enabled() ) diff --git a/testsuite/src/test_axpy2v.c b/testsuite/src/test_axpy2v.c index 3019d472b2..825ffcd6b9 100644 --- a/testsuite/src/test_axpy2v.c +++ b/testsuite/src/test_axpy2v.c @@ -172,11 +172,11 @@ void libblis_test_axpy2v_experiment obj_t alpha1, alpha2, x, y, z; obj_t z_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_axpyf.c b/testsuite/src/test_axpyf.c index 42ab73018c..d644235fb6 100644 --- a/testsuite/src/test_axpyf.c +++ b/testsuite/src/test_axpyf.c @@ -170,11 +170,11 @@ void libblis_test_axpyf_experiment obj_t alpha, a, x, y; obj_t y_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_dotaxpyv.c b/testsuite/src/test_dotaxpyv.c index 8e09e3ee17..77c2962cd8 100644 --- a/testsuite/src/test_dotaxpyv.c +++ b/testsuite/src/test_dotaxpyv.c @@ -175,11 +175,11 @@ void libblis_test_dotaxpyv_experiment obj_t alpha, xt, x, y, rho, z; obj_t z_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_dotxaxpyf.c b/testsuite/src/test_dotxaxpyf.c index ec519de51e..c2652dc8be 100644 --- a/testsuite/src/test_dotxaxpyf.c +++ b/testsuite/src/test_dotxaxpyf.c @@ -180,11 +180,11 @@ void libblis_test_dotxaxpyf_experiment obj_t alpha, at, a, w, x, beta, y, z; obj_t y_save, z_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_dotxf.c b/testsuite/src/test_dotxf.c index 83f4b44ebe..d01499d7dd 100644 --- a/testsuite/src/test_dotxf.c +++ b/testsuite/src/test_dotxf.c @@ -172,11 +172,11 @@ void libblis_test_dotxf_experiment obj_t alpha, a, x, beta, y; obj_t y_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_gemm_ukr.c b/testsuite/src/test_gemm_ukr.c index 69ee4339da..961f5e4b18 100644 --- a/testsuite/src/test_gemm_ukr.c +++ b/testsuite/src/test_gemm_ukr.c @@ -177,11 +177,11 @@ void libblis_test_gemm_ukr_experiment obj_t ap, bp; obj_t c_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_gemmtrsm_ukr.c b/testsuite/src/test_gemmtrsm_ukr.c index 44ba51587c..f95a06b90e 100644 --- a/testsuite/src/test_gemmtrsm_ukr.c +++ b/testsuite/src/test_gemmtrsm_ukr.c @@ -190,10 +190,10 @@ void libblis_test_gemmtrsm_ukr_experiment dim_t m, n, k; inc_t ldap, ldbp; - char sc_a = 'c'; - char sc_b = 'r'; + char sc_a = 'c'; + char sc_b = 'r'; - side_t side = BLIS_LEFT; + side_t side = BLIS_LEFT; uplo_t uploa; obj_t alpha; @@ -203,11 +203,11 @@ void libblis_test_gemmtrsm_ukr_experiment obj_t a1xp, a11p, bx1p, b11p; obj_t c11_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index eaa0a9cefe..ad5c660eb0 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -742,9 +742,6 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) //char int_type_size_str[8]; gint_t int_type_size; ind_t im; - cntx_t* cntx; - cntx_t* cntx_c; - cntx_t* cntx_z; // If bli_info_get_int_type_size() returns 32 or 64, the size is forced. // Otherwise, the size is chosen automatically. We query the result of @@ -816,6 +813,15 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) bli_rntm_set_ways_for_op( BLIS_TRSM, BLIS_LEFT, m, n, k, &trsm_l ); bli_rntm_set_ways_for_op( BLIS_TRSM, BLIS_RIGHT, m, n, k, &trsm_r ); + // Query an arch_t id. + arch_t arch_id; + bli_arch_query_id( &arch_id ); + + // Use the arch_t id we just queried to query the corresponding architecture + // string. + const char* arch_str; + bli_arch_string( arch_id, &arch_str ); + // Output some system parameters. libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS library info -------------------------------------\n" ); @@ -824,7 +830,7 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS configuration info ---\n" ); libblis_test_fprintf_c( os, "\n" ); - libblis_test_fprintf_c( os, "active sub-configuration %s\n", bli_arch_string( bli_arch_query_id() ) ); + libblis_test_fprintf_c( os, "active sub-configuration %s\n", arch_str ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "BLIS integer type size (bits) %d\n", ( int )int_type_size ); libblis_test_fprintf_c( os, "\n" ); @@ -907,65 +913,102 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, " jr/ir loops %s\n", jrir_str ); libblis_test_fprintf_c( os, "\n" ); + const char* opim_str[ BLIS_NUM_FP_TYPES ] + [ BLIS_NUM_LEVEL3_OPS ]; + + // Iterate over the list of supported floating-point datatypes -- + // BLIS_FLOAT, _DOUBLE, _SCOMPLEX, _DCOMPLEX -- and for each query a + // pointer to the operation implementation string into the appropriate + // location within the opim_str array. + for ( num_t dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) + { + bli_info_get_gemm_impl_string( dt, &opim_str[dt][BLIS_GEMM] ); + bli_info_get_hemm_impl_string( dt, &opim_str[dt][BLIS_HEMM] ); + bli_info_get_herk_impl_string( dt, &opim_str[dt][BLIS_HERK] ); + bli_info_get_her2k_impl_string( dt, &opim_str[dt][BLIS_HER2K] ); + bli_info_get_symm_impl_string( dt, &opim_str[dt][BLIS_SYMM] ); + bli_info_get_syrk_impl_string( dt, &opim_str[dt][BLIS_SYRK] ); + bli_info_get_syr2k_impl_string( dt, &opim_str[dt][BLIS_SYR2K] ); + bli_info_get_trmm_impl_string( dt, &opim_str[dt][BLIS_TRMM] ); + bli_info_get_trmm3_impl_string( dt, &opim_str[dt][BLIS_TRMM3] ); + bli_info_get_trsm_impl_string( dt, &opim_str[dt][BLIS_TRSM] ); + } + libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS default implementations ---\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "level-3 implementations s d c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s %7s %7s\n", - bli_info_get_gemm_impl_string( BLIS_FLOAT ), - bli_info_get_gemm_impl_string( BLIS_DOUBLE ), - bli_info_get_gemm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_gemm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_GEMM], + opim_str[BLIS_DOUBLE ][BLIS_GEMM], + opim_str[BLIS_SCOMPLEX][BLIS_GEMM], + opim_str[BLIS_DCOMPLEX][BLIS_GEMM] ); libblis_test_fprintf_c( os, " hemm %7s %7s %7s %7s\n", - bli_info_get_hemm_impl_string( BLIS_FLOAT ), - bli_info_get_hemm_impl_string( BLIS_DOUBLE ), - bli_info_get_hemm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_hemm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_HEMM], + opim_str[BLIS_DOUBLE ][BLIS_HEMM], + opim_str[BLIS_SCOMPLEX][BLIS_HEMM], + opim_str[BLIS_DCOMPLEX][BLIS_HEMM] ); libblis_test_fprintf_c( os, " herk %7s %7s %7s %7s\n", - bli_info_get_herk_impl_string( BLIS_FLOAT ), - bli_info_get_herk_impl_string( BLIS_DOUBLE ), - bli_info_get_herk_impl_string( BLIS_SCOMPLEX ), - bli_info_get_herk_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_HERK], + opim_str[BLIS_DOUBLE ][BLIS_HERK], + opim_str[BLIS_SCOMPLEX][BLIS_HERK], + opim_str[BLIS_DCOMPLEX][BLIS_HERK] ); libblis_test_fprintf_c( os, " her2k %7s %7s %7s %7s\n", - bli_info_get_her2k_impl_string( BLIS_FLOAT ), - bli_info_get_her2k_impl_string( BLIS_DOUBLE ), - bli_info_get_her2k_impl_string( BLIS_SCOMPLEX ), - bli_info_get_her2k_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_HER2K], + opim_str[BLIS_DOUBLE ][BLIS_HER2K], + opim_str[BLIS_SCOMPLEX][BLIS_HER2K], + opim_str[BLIS_DCOMPLEX][BLIS_HER2K] ); libblis_test_fprintf_c( os, " symm %7s %7s %7s %7s\n", - bli_info_get_symm_impl_string( BLIS_FLOAT ), - bli_info_get_symm_impl_string( BLIS_DOUBLE ), - bli_info_get_symm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_symm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_SYMM], + opim_str[BLIS_DOUBLE ][BLIS_SYMM], + opim_str[BLIS_SCOMPLEX][BLIS_SYMM], + opim_str[BLIS_DCOMPLEX][BLIS_SYMM] ); libblis_test_fprintf_c( os, " syrk %7s %7s %7s %7s\n", - bli_info_get_syrk_impl_string( BLIS_FLOAT ), - bli_info_get_syrk_impl_string( BLIS_DOUBLE ), - bli_info_get_syrk_impl_string( BLIS_SCOMPLEX ), - bli_info_get_syrk_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_SYRK], + opim_str[BLIS_DOUBLE ][BLIS_SYRK], + opim_str[BLIS_SCOMPLEX][BLIS_SYRK], + opim_str[BLIS_DCOMPLEX][BLIS_SYRK] ); libblis_test_fprintf_c( os, " syr2k %7s %7s %7s %7s\n", - bli_info_get_syr2k_impl_string( BLIS_FLOAT ), - bli_info_get_syr2k_impl_string( BLIS_DOUBLE ), - bli_info_get_syr2k_impl_string( BLIS_SCOMPLEX ), - bli_info_get_syr2k_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_SYR2K], + opim_str[BLIS_DOUBLE ][BLIS_SYR2K], + opim_str[BLIS_SCOMPLEX][BLIS_SYR2K], + opim_str[BLIS_DCOMPLEX][BLIS_SYR2K] ); libblis_test_fprintf_c( os, " trmm %7s %7s %7s %7s\n", - bli_info_get_trmm_impl_string( BLIS_FLOAT ), - bli_info_get_trmm_impl_string( BLIS_DOUBLE ), - bli_info_get_trmm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_trmm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_TRMM], + opim_str[BLIS_DOUBLE ][BLIS_TRMM], + opim_str[BLIS_SCOMPLEX][BLIS_TRMM], + opim_str[BLIS_DCOMPLEX][BLIS_TRMM] ); libblis_test_fprintf_c( os, " trmm3 %7s %7s %7s %7s\n", - bli_info_get_trmm3_impl_string( BLIS_FLOAT ), - bli_info_get_trmm3_impl_string( BLIS_DOUBLE ), - bli_info_get_trmm3_impl_string( BLIS_SCOMPLEX ), - bli_info_get_trmm3_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_TRMM3], + opim_str[BLIS_DOUBLE ][BLIS_TRMM3], + opim_str[BLIS_SCOMPLEX][BLIS_TRMM3], + opim_str[BLIS_DCOMPLEX][BLIS_TRMM3] ); libblis_test_fprintf_c( os, " trsm %7s %7s %7s %7s\n", - bli_info_get_trsm_impl_string( BLIS_FLOAT ), - bli_info_get_trsm_impl_string( BLIS_DOUBLE ), - bli_info_get_trsm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_trsm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_TRSM], + opim_str[BLIS_DOUBLE ][BLIS_TRSM], + opim_str[BLIS_SCOMPLEX][BLIS_TRSM], + opim_str[BLIS_DCOMPLEX][BLIS_TRSM] ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "\n" ); //bli_ind_disable_all(); + const char* cimpl_str[ BLIS_NUM_FP_TYPES ]; + + // For each of the complex datatypes, query a pointer to a string that + // describes the complex implementation (e.g. "1m" or "native"). We only + // report the string for gemm since currently all level-3 operations use + // the same implementation method. This may change in the future if, for + // example, new level-3-like operations are added to BLIS that don't have + // complex domain analogues, or if those complex analogues cannot be + // cleanly expressed via the 1m method. If/when that happens, it would be + // appropriate to list the implementation method on a per-operation basis, + // for all level-3 operations (instead of for only gemm). + bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX, + &cimpl_str[BLIS_SCOMPLEX] ); + bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX, + &cimpl_str[BLIS_DCOMPLEX] ); + bli_ind_oper_enable_only( BLIS_GEMM, BLIS_NAT, BLIS_SCOMPLEX ); bli_ind_oper_enable_only( BLIS_GEMM, BLIS_NAT, BLIS_DCOMPLEX ); @@ -973,12 +1016,13 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, "complex implementation %7s %7s\n", - bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX ), - bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX ) ); + cimpl_str[BLIS_SCOMPLEX], + cimpl_str[BLIS_DCOMPLEX] ); libblis_test_fprintf_c( os, "\n" ); // Query a native context. - cntx = ( cntx_t* )bli_gks_query_nat_cntx(); + const cntx_t* cntx; + bli_gks_query_nat_cntx( &cntx ); libblis_test_fprintf_c( os, "level-3 blocksizes s d c z \n" ); libblis_test_fprintf_c( os, " mc %7d %7d %7d %7d\n", @@ -1035,32 +1079,50 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NR, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NR, cntx ) ); libblis_test_fprintf_c( os, "\n" ); + + + const char* ki_str[ BLIS_NUM_FP_TYPES ] + [ BLIS_NUM_UKRS ]; + + // Iterate over the list of supported floating-point datatypes -- + // BLIS_FLOAT, _DOUBLE, _SCOMPLEX, _DCOMPLEX -- and for each query a + // pointer to the microkernel implementation string into the appropriate + // location within the ki_str array. + for ( num_t dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) + { + bli_info_get_gemm_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_GEMM_UKR] ); + bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_GEMMTRSM_L_UKR] ); + bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_GEMMTRSM_U_UKR] ); + bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_TRSM_L_UKR] ); + bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_TRSM_U_UKR] ); + } + libblis_test_fprintf_c( os, "micro-kernel types s d c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s %7s %7s\n", - bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_GEMM_UKR], + ki_str[BLIS_DOUBLE ][BLIS_GEMM_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_GEMM_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMM_UKR] ); libblis_test_fprintf_c( os, " gemmtrsm_l %7s %7s %7s %7s\n", - bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_GEMMTRSM_L_UKR], + ki_str[BLIS_DOUBLE ][BLIS_GEMMTRSM_L_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_GEMMTRSM_L_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMMTRSM_L_UKR] ); libblis_test_fprintf_c( os, " gemmtrsm_u %7s %7s %7s %7s\n", - bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_GEMMTRSM_U_UKR], + ki_str[BLIS_DOUBLE ][BLIS_GEMMTRSM_U_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_GEMMTRSM_U_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMMTRSM_U_UKR] ); libblis_test_fprintf_c( os, " trsm_l %7s %7s %7s %7s\n", - bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_TRSM_L_UKR], + ki_str[BLIS_DOUBLE ][BLIS_TRSM_L_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_TRSM_L_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_TRSM_L_UKR] ); libblis_test_fprintf_c( os, " trsm_u %7s %7s %7s %7s\n", - bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_TRSM_U_UKR], + ki_str[BLIS_DOUBLE ][BLIS_TRSM_U_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_TRSM_U_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_TRSM_U_UKR] ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "micro-kernel prefers rows? s d c z\n" ); @@ -1102,70 +1164,89 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) bli_ind_oper_enable_only( BLIS_GEMM, im, BLIS_SCOMPLEX ); bli_ind_oper_enable_only( BLIS_GEMM, im, BLIS_DCOMPLEX ); + bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX, + &cimpl_str[BLIS_SCOMPLEX] ); + bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX, + &cimpl_str[BLIS_DCOMPLEX] ); + //libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, "complex implementation %7s %7s\n", - bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX ), - bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX ) ); + cimpl_str[BLIS_SCOMPLEX], + cimpl_str[BLIS_DCOMPLEX] ); libblis_test_fprintf_c( os, "\n" ); // Query a native context. NOTE: Now that we've removed the dt argument from // bli_gks_query_ind_cntx(), we can consolidate cntx_c and cntx_z; there is // no need to query two contexts since they are the same. - cntx_c = ( cntx_t* )bli_gks_query_ind_cntx( im ); - cntx_z = ( cntx_t* )bli_gks_query_ind_cntx( im ); + const cntx_t* cntx_c; + bli_gks_query_ind_cntx( im, &cntx_c ); libblis_test_fprintf_c( os, "level-3 blocksizes c z \n" ); libblis_test_fprintf_c( os, " mc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MC, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_c ) ); libblis_test_fprintf_c( os, " kc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_KC, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_c ) ); libblis_test_fprintf_c( os, " nc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_NC, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_c ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_MC, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_c ) ); libblis_test_fprintf_c( os, " kc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_KC, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_c ) ); libblis_test_fprintf_c( os, " nc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NC, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_c ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mr %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MR, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_c ) ); libblis_test_fprintf_c( os, " nr %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_NR, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_c ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mr packdim %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_MR, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_c ) ); libblis_test_fprintf_c( os, " nr packdim %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NR, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_c ) ); libblis_test_fprintf_c( os, "\n" ); + + // Iterate over the list of supported floating-point datatypes -- + // BLIS_FLOAT, _DOUBLE, _SCOMPLEX, _DCOMPLEX -- and for each query a + // pointer to the microkernel implementation string into the appropriate + // location within the ki_str array. + for ( num_t dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) + { + bli_info_get_gemm_ukr_impl_string( im, dt, &ki_str[dt][BLIS_GEMM_UKR] ); + bli_info_get_gemmtrsm_l_ukr_impl_string( im, dt, &ki_str[dt][BLIS_GEMMTRSM_L_UKR] ); + bli_info_get_gemmtrsm_u_ukr_impl_string( im, dt, &ki_str[dt][BLIS_GEMMTRSM_U_UKR] ); + bli_info_get_trsm_l_ukr_impl_string( im, dt, &ki_str[dt][BLIS_TRSM_L_UKR] ); + bli_info_get_trsm_u_ukr_impl_string( im, dt, &ki_str[dt][BLIS_TRSM_U_UKR] ); + } + libblis_test_fprintf_c( os, "micro-kernel types c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s\n", - bli_info_get_gemm_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_gemm_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_GEMM_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMM_UKR] ); libblis_test_fprintf_c( os, " gemmtrsm_l %7s %7s\n", - bli_info_get_gemmtrsm_l_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_gemmtrsm_l_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_GEMMTRSM_L_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMMTRSM_L_UKR] ); libblis_test_fprintf_c( os, " gemmtrsm_u %7s %7s\n", - bli_info_get_gemmtrsm_u_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_gemmtrsm_u_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_GEMMTRSM_U_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMMTRSM_U_UKR] ); libblis_test_fprintf_c( os, " trsm_l %7s %7s\n", - bli_info_get_trsm_l_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_trsm_l_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_TRSM_L_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_TRSM_L_UKR] ); libblis_test_fprintf_c( os, " trsm_u %7s %7s\n", - bli_info_get_trsm_u_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_trsm_u_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_TRSM_U_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_TRSM_U_UKR] ); libblis_test_fprintf_c( os, "\n" ); } @@ -1605,7 +1686,6 @@ void libblis_test_op_driver double perf, resid; char* pass_str; - char* ind_str; char blank_str[32]; char funcname_str[64]; char dims_str[64]; @@ -2209,7 +2289,9 @@ void libblis_test_op_driver // Query the implementation string associated with the // current operation and datatype. If the operation is // not level-3, we will always get back the native string. - ind_str = ( char* )bli_ind_oper_get_avail_impl_string( op->opid, datatype ); + char* ind_str; + bli_ind_oper_get_avail_impl_string( op->opid, datatype, + ( const char** )&ind_str ); // Loop over the requested parameter combinations. for ( pci = 0; pci < n_param_combos; ++pci ) diff --git a/testsuite/src/test_trsm_ukr.c b/testsuite/src/test_trsm_ukr.c index 5f4988e1c7..1e59a06fd0 100644 --- a/testsuite/src/test_trsm_ukr.c +++ b/testsuite/src/test_trsm_ukr.c @@ -172,21 +172,21 @@ void libblis_test_trsm_ukr_experiment dim_t m, n; - char sc_a = 'c'; - char sc_b = 'r'; + char sc_a = 'c'; + char sc_b = 'r'; - side_t side = BLIS_LEFT; + side_t side = BLIS_LEFT; uplo_t uploa; obj_t a, b, c; obj_t ap, bp; obj_t c_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); From 0d8d1991e109e1a6b493fcd2b7d0ed37afc5a64d Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sun, 10 Jul 2022 11:40:57 -0500 Subject: [PATCH 3/5] Fixed compile+logical errors in sup infrastructure. Details: - Fixed a couple of compilation errors plus one logical error due to variable shadowing. --- frame/thread/bli_l3_sup_decor_openmp.c | 3 ++- frame/thread/bli_l3_sup_decor_pthreads.c | 3 ++- frame/thread/bli_thrinfo_sup.c | 4 +--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/frame/thread/bli_l3_sup_decor_openmp.c b/frame/thread/bli_l3_sup_decor_openmp.c index ba6f580086..7ab8f56ae5 100644 --- a/frame/thread/bli_l3_sup_decor_openmp.c +++ b/frame/thread/bli_l3_sup_decor_openmp.c @@ -80,7 +80,8 @@ err_t bli_l3_sup_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allcoate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); _Pragma( "omp parallel num_threads(n_threads)" ) diff --git a/frame/thread/bli_l3_sup_decor_pthreads.c b/frame/thread/bli_l3_sup_decor_pthreads.c index b6eef42632..e5688995e3 100644 --- a/frame/thread/bli_l3_sup_decor_pthreads.c +++ b/frame/thread/bli_l3_sup_decor_pthreads.c @@ -147,7 +147,8 @@ err_t bli_l3_sup_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allocate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); // Allocate an array of pthread objects and auxiliary data structs to pass // to the thread entry functions. diff --git a/frame/thread/bli_thrinfo_sup.c b/frame/thread/bli_thrinfo_sup.c index f450247b9e..2dc0ed0771 100644 --- a/frame/thread/bli_thrinfo_sup.c +++ b/frame/thread/bli_thrinfo_sup.c @@ -169,8 +169,6 @@ err_t bli_thrinfo_sup_create_for_cntl // and simplified to this. if ( bli_rntm_calc_num_threads( rntm ) == 1 ) { - thrinfo_t* thread_chl; - r_val = bli_thrinfo_create ( rntm, // rntm @@ -181,7 +179,7 @@ err_t bli_thrinfo_sup_create_for_cntl FALSE, // free_comm BLIS_NO_PART, // bszid NULL, // sub_node - &thread_chl // node + thread_chl // node ); bli_check_return_if_failure( r_val ); From 4d0fe92966ca4ada59f4415787fdad06d2ee15b7 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sun, 10 Jul 2022 12:04:51 -0500 Subject: [PATCH 4/5] More compile+logical fixes in sup infra. --- frame/thread/bli_l3_sup_decor_openmp.c | 2 +- frame/thread/bli_l3_sup_decor_single.c | 5 +++-- frame/thread/bli_thrinfo_sup.c | 6 ++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/frame/thread/bli_l3_sup_decor_openmp.c b/frame/thread/bli_l3_sup_decor_openmp.c index 7ab8f56ae5..a07b508b58 100644 --- a/frame/thread/bli_l3_sup_decor_openmp.c +++ b/frame/thread/bli_l3_sup_decor_openmp.c @@ -79,7 +79,7 @@ err_t bli_l3_sup_thread_decorator // the rntm below. bli_pba_rntm_set_pba( rntm ); - // Allcoate a global communicator for the root thrinfo_t structures. + // Allocate a global communicator for the root thrinfo_t structures. thrcomm_t* gl_comm; bli_thrcomm_create( rntm, n_threads, &gl_comm ); diff --git a/frame/thread/bli_l3_sup_decor_single.c b/frame/thread/bli_l3_sup_decor_single.c index 665000f304..9c93008db9 100644 --- a/frame/thread/bli_l3_sup_decor_single.c +++ b/frame/thread/bli_l3_sup_decor_single.c @@ -71,8 +71,9 @@ err_t bli_l3_sup_thread_decorator bli_pba_rntm_set_pba( rntm ); #ifndef SKIP_THRINFO_TREE - // Allcoate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + // Allocate a global communicator for the root thrinfo_t structures. + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); #endif diff --git a/frame/thread/bli_thrinfo_sup.c b/frame/thread/bli_thrinfo_sup.c index 2dc0ed0771..d0af37e7c3 100644 --- a/frame/thread/bli_thrinfo_sup.c +++ b/frame/thread/bli_thrinfo_sup.c @@ -179,7 +179,7 @@ err_t bli_thrinfo_sup_create_for_cntl FALSE, // free_comm BLIS_NO_PART, // bszid NULL, // sub_node - thread_chl // node + thread_chl // node ); bli_check_return_if_failure( r_val ); @@ -211,8 +211,6 @@ err_t bli_thrinfo_sup_create_for_cntl // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl; - r_val = bli_thrinfo_create ( rntm, // rntm @@ -223,7 +221,7 @@ err_t bli_thrinfo_sup_create_for_cntl TRUE, // free_comm *bszid_chl, // bszid NULL, // sub_node - &thread_chl // node + thread_chl // node ); bli_check_return_if_failure( r_val ); From 4367a5dfc5d5e970e21ef7f9595bdfc6a4a210d2 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sun, 10 Jul 2022 12:41:03 -0500 Subject: [PATCH 5/5] Export bli_pba_rntm_set_pba() as non-inline func. Details: - Promote bli_pba_rntm_set_pba() to a full function, rather than a static inline function, so that it can be exported for shared libraries. This seems better than the alternative of exporting the function bli_pba_query(), which, as far as I can see, end users should not need access to. --- frame/base/bli_pba.c | 12 ++++++++++++ frame/base/bli_pba.h | 13 +++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/frame/base/bli_pba.c b/frame/base/bli_pba.c index 1a06dcd9e2..9b00d44b29 100644 --- a/frame/base/bli_pba.c +++ b/frame/base/bli_pba.c @@ -66,6 +66,18 @@ pba_t* bli_pba_query( void ) return &pba; } +void bli_pba_rntm_set_pba + ( + rntm_t* rntm + ) +{ + pba_t* pba = bli_pba_query(); + + bli_rntm_set_pba( pba, rntm ); +} + +// ----------------------------------------------------------------------------- + err_t bli_pba_init ( void diff --git a/frame/base/bli_pba.h b/frame/base/bli_pba.h index 377ced4da1..89d20838dc 100644 --- a/frame/base/bli_pba.h +++ b/frame/base/bli_pba.h @@ -124,6 +124,9 @@ void bli_pba_mark_init( void ); void bli_pba_mark_uninit( void ); pba_t* bli_pba_query( void ); +BLIS_EXPORT_BLIS void bli_pba_rntm_set_pba( rntm_t* rntm ); + +// ----------------------------------------------------------------------------- err_t bli_pba_init( void ); err_t bli_pba_finalize( void ); @@ -142,16 +145,6 @@ void bli_pba_release mem_t* mem ); -BLIS_INLINE void bli_pba_rntm_set_pba - ( - rntm_t* rntm - ) -{ - pba_t* pba = bli_pba_query(); - - bli_rntm_set_pba( pba, rntm ); -} - siz_t bli_pba_pool_size ( const pba_t* pba,