From a6fc17ffbe1fe4385b4033301e5868fedad73fc0 Mon Sep 17 00:00:00 2001 From: satish kumar nuggu Date: Thu, 12 Nov 2020 21:30:29 +0530 Subject: [PATCH 1/2] Added Blas interface for ?imatcopy, ?omatcopy, ?omatadd, ?omatcopy2 AMD-Internal: [CPUPL-1116] Original review was in this commit http://gerrit-git.amd.com/c/cpulibraries/er/blis/+/428165. Added new commit for transpose API's Change-Id: I322389cc0be0aaccf82d1d0bb4476beea8694cd8 --- frame/compat/bla_imatcopy.c | 644 ++++++++++++++++++++++++ frame/compat/bla_imatcopy.h | 49 ++ frame/compat/bla_omatadd.c | 510 +++++++++++++++++++ frame/compat/bla_omatadd.h | 49 ++ frame/compat/bla_omatcopy.c | 937 ++++++++++++++++++++++++++++++++++ frame/compat/bla_omatcopy.h | 49 ++ frame/compat/bla_omatcopy2.c | 942 +++++++++++++++++++++++++++++++++++ frame/compat/bla_omatcopy2.h | 50 ++ frame/compat/bli_blas.h | 6 + test/test_imatcopy.c | 165 ++++++ test/test_omatadd.c | 183 +++++++ test/test_omatcopy.c | 161 ++++++ test/test_omatcopy2.c | 170 +++++++ 13 files changed, 3915 insertions(+) create mode 100644 frame/compat/bla_imatcopy.c create mode 100644 frame/compat/bla_imatcopy.h create mode 100644 frame/compat/bla_omatadd.c create mode 100644 frame/compat/bla_omatadd.h create mode 100644 frame/compat/bla_omatcopy.c create mode 100644 frame/compat/bla_omatcopy.h create mode 100644 frame/compat/bla_omatcopy2.c create mode 100644 frame/compat/bla_omatcopy2.h create mode 100644 test/test_imatcopy.c create mode 100644 test/test_omatadd.c create mode 100644 test/test_omatcopy.c create mode 100644 test/test_omatcopy2.c diff --git a/frame/compat/bla_imatcopy.c b/frame/compat/bla_imatcopy.c new file mode 100644 index 0000000000..dd07bb5314 --- /dev/null +++ b/frame/compat/bla_imatcopy.c @@ -0,0 +1,644 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#ifdef BLIS_ENABLE_BLAS + +static dim_t bli_siMatCopy_cn(dim_t rows,dim_t cols,const float alpha,float* a,dim_t lda, dim_t ldb); + +static dim_t bli_diMatCopy_cn(dim_t rows,dim_t cols,const double alpha,double* a,dim_t lda, dim_t ldb); + +static dim_t bli_ciMatCopy_cn(dim_t rows,dim_t cols,const scomplex alpha,scomplex* a,dim_t lda, dim_t ldb); + +static dim_t bli_ciMatCopy_cr(dim_t rows,dim_t cols,const scomplex alpha,scomplex* a,dim_t lda, dim_t ldb); + +static dim_t bli_ziMatCopy_cn(dim_t rows,dim_t cols,const dcomplex alpha,dcomplex* a,dim_t lda, dim_t ldb); + +static dim_t bli_ziMatCopy_cr(dim_t rows,dim_t cols,const dcomplex alpha,dcomplex* a,dim_t lda, dim_t ldb); + +static void bli_stranspose(float* A,float* B,dim_t cols, dim_t rows); + +static void bli_dtranspose(double* A,double* B,dim_t cols, dim_t rows); + +static void bli_ctranspose(scomplex* A,scomplex* B,dim_t cols, dim_t rows); + +static void bli_ztranspose(dcomplex* A,dcomplex* B,dim_t cols, dim_t rows); + +static void bli_stranspose(float* A,float* B,dim_t cols, dim_t rows) +{ + for (dim_t i = 0; i < cols; i++) + for (dim_t j = 0; j < rows; j++) + B[j*cols + i] = A[i*rows +j]; +} + +static void bli_dtranspose(double* A,double* B,dim_t cols, dim_t rows) +{ + for (dim_t i = 0; i < cols; i++) + for (dim_t j = 0; j < rows; j++) + B[j*cols + i] = A[i*rows +j]; +} + +static void bli_ctranspose(scomplex* A,scomplex* B,dim_t cols, dim_t rows) +{ + for (dim_t i = 0; i < cols; i++) + for (dim_t j = 0; j < rows; j++) + B[j*cols + i] = A[i*rows +j]; +} + +static void bli_ztranspose(dcomplex* A,dcomplex* B,dim_t cols, dim_t rows) +{ + for (dim_t i = 0; i < cols; i++) + for (dim_t j = 0; j < rows; j++) + B[j*cols + i] = A[i*rows +j]; +} + +void simatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha,float* aptr, f77_int* lda, f77_int* ldb) +{ + //printf("I am from simatcopy_\n"); + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); + //bli_init_once(); + if ( !(*trans == 'n' || *trans == 'N' || + *trans == 't' || *trans == 'T' || + *trans == 'c' || *trans == 'C' || + *trans == 'r' || *trans == 'R')) + { + bli_print_msg( " Invalid trans setting simatcopy_() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); + return ; + } + + if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) + { + bli_print_msg( " Invalid function parameters simatcopy_() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); + return ; + } + + if ( *trans == 'n' || *trans == 'N') + { + bli_siMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else if ( *trans == 't' || *trans == 'T') + { + //pre transpose + float* temp = (float* ) bli_malloc_user((*rows)*(*lda)*sizeof(float)); + bli_stranspose(aptr,temp,*lda,*rows); + + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(float)); + + bli_siMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_stranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'c' || *trans == 'C') + { + //pre transpose + float* temp = (float* ) bli_malloc_user((*rows)*(*lda)*sizeof(float)); + bli_stranspose(aptr,temp,*lda,*rows); + + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(float)); + + //bli_siMatCopy_cn(*cols,*rows,*alpha,temp,*lda,*ldb); + + bli_siMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + //post transpose + //bli_stranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'r' || *trans == 'R') + { + bli_siMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else + { + // do nothing + } + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); + return ; +} + +void dimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha,double* aptr, f77_int* lda, f77_int* ldb) +{ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); + //bli_init_once(); + if ( !(*trans == 'n' || *trans == 'N' || + *trans == 't' || *trans == 'T' || + *trans == 'c' || *trans == 'C' || + *trans == 'r' || *trans == 'R')) + { + bli_print_msg( " Invalid trans setting dimatcopy_() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); + return ; + } + + if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) + { + bli_print_msg( " Invalid function parameters dimatcopy_() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); + return ; + } + + if ( *trans == 'n' || *trans == 'N') + { + bli_diMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else if ( *trans == 't' || *trans == 'T') + { + //pre transpose + double* temp = (double* ) bli_malloc_user((*rows)*(*lda)*sizeof(double)); + bli_dtranspose(aptr,temp,*lda,*rows); + + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(double)); + + bli_diMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_dtranspose(temp,aptr,*rows,*lda); + //bli_dtranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'c' || *trans == 'C') + { + //pre transpose + double* temp = (double* ) bli_malloc_user((*rows)*(*lda)*sizeof(double)); + bli_dtranspose(aptr,temp,*lda,*rows); + + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(double)); + + bli_diMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_dtranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'r' || *trans == 'R') + { + bli_diMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else + { + // do nothing + } + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); + return ; +} + +void cimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha,scomplex* aptr, f77_int* lda, f77_int* ldb) +{ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); + //bli_init_once(); + if ( !(*trans == 'n' || *trans == 'N' || + *trans == 't' || *trans == 'T' || + *trans == 'c' || *trans == 'C' || + *trans == 'r' || *trans == 'R')) + { + bli_print_msg( " Invalid trans setting cimatcopy_() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); + return ; + } + + if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) + { + bli_print_msg( " Invalid function parameters cimatcopy_() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); + return ; + } + + if ( *trans == 'n' || *trans == 'N') + { + bli_ciMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else if ( *trans == 't' || *trans == 'T') + { + //pre transpose + scomplex* temp = (scomplex* ) bli_malloc_user((*rows)*(*lda)*sizeof(scomplex)); + bli_ctranspose(aptr,temp,*lda,*rows); + + //bli_ciMatCopy_cn(*cols,*rows,*alpha,temp,*lda,*ldb); + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(scomplex)); + bli_ciMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_ctranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'c' || *trans == 'C') + { + + //pre transpose + scomplex* temp = (scomplex* ) bli_malloc_user((*rows)*(*lda)*sizeof(scomplex)); + bli_ctranspose(aptr,temp,*lda,*rows); + + //bli_ciMatCopy_cr(*cols,*rows,*alpha,temp,*lda,*ldb); + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(scomplex)); + bli_ciMatCopy_cr(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_ctranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'r' || *trans == 'R') + { + bli_ciMatCopy_cr(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else + { + // do nothing + } + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); + return ; +} + +void zimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha,dcomplex* aptr, f77_int* lda, f77_int* ldb) +{ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); + //bli_init_once(); + if ( !(*trans == 'n' || *trans == 'N' || + *trans == 't' || *trans == 'T' || + *trans == 'c' || *trans == 'C' || + *trans == 'r' || *trans == 'R')) + { + bli_print_msg( " Invalid trans setting zimatcopy_() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); + return ; + } + + if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) + { + bli_print_msg( " Invalid function parameters dimatcopy_() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); + return ; + } + + if ( *trans == 'n' || *trans == 'N') + { + bli_ziMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else if ( *trans == 't' || *trans == 'T') + { + + //pre transpose + dcomplex* temp = (dcomplex *) bli_malloc_user((*rows)*(*lda)*sizeof(dcomplex)); + bli_ztranspose(aptr,temp,*lda,*rows); + + //bli_ziMatCopy_cn(*cols,*rows,*alpha,temp,*lda,*ldb); + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(dcomplex)); + bli_ziMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_ztranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'c' || *trans == 'C') + { + //pre transpose + dcomplex* temp = (dcomplex *) bli_malloc_user((*rows)*(*lda)*sizeof(dcomplex)); + bli_ztranspose(aptr,temp,*lda,*rows); + + //bli_ziMatCopy_cr(*cols,*rows,*alpha,temp,*lda,*ldb); + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(scomplex)); + bli_ziMatCopy_cr(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_ztranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'r' || *trans == 'R') + { + bli_ziMatCopy_cr(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else + { + // do nothing + } + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); + return ; +} + +// suffix cn means - column major & non-trans +static dim_t bli_siMatCopy_cn(dim_t rows,dim_t cols,const float alpha,float* a,dim_t lda, dim_t ldb) +{ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); + dim_t i,j; + + float* s_aptr; + float* d_aptr; + + if ( rows <= 0 || cols <= 0 || a == NULL || lda < cols || ldb < cols) + { + bli_print_msg( " Invalid function parameters bli_siMatCopy_cn() .", __FILE__, __LINE__ ); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "Invalid function parameters"); + return (0); + } + + if ( lda == ldb && alpha == 1.0) + return (0); + + s_aptr = a; + d_aptr = a; + if ( alpha == 0.0 ) + { + for ( i=0; i +#else +#include +#endif +#include "blis.h" + +int main( int argc, char** argv ) +{ + //printf("imatcopy_ test.....start\n"); + + obj_t data; + obj_t alpha; + dim_t p; + dim_t p_begin, p_end, p_inc; + p_begin = 200; + p_end = 2000; + p_inc = 200; + + trans_t trans; + f77_char f77_trans; + + num_t dt; + int rows; + int cols; + //f77_int lda; + //f77_int ldb; + + inc_t cs_data; + + double dtime; + double dtime_save; + double gflops; + + /*rows = 4; + cols = 4; + lda = 4; + ldb = 4; +*/ + //cs_data = rows; + + dt = BLIS_DOUBLE; + trans = BLIS_NO_TRANSPOSE; + bli_param_map_blis_to_netlib_trans( trans, &f77_trans); + + dtime_save = DBL_MAX; + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + rows = p; + cols = p; + //lda = p; + //ldb = p; + + cs_data = rows; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha); + bli_obj_create( dt, rows, cols, 1, cs_data, &data ); + + bli_randm( &data ); + bli_setsc( (2.0/1.0), 0.0, &alpha ); + + dtime = bli_clock(); + + if ( bli_is_float( dt ) ) + { + f77_int rows_p = bli_obj_length( &data); + f77_int cols_p = bli_obj_width( &data); + + f77_int lda = bli_obj_col_stride( &data); + f77_int ldb = bli_obj_col_stride( &data); + + float* alpha_p = bli_obj_buffer( &alpha ); + float* a_p = bli_obj_buffer( &data); + + simatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &ldb); + } + else if ( bli_is_double( dt ) ) + { + f77_int rows_p = bli_obj_length( &data); + f77_int cols_p = bli_obj_width( &data); + + f77_int lda = bli_obj_col_stride( &data); + f77_int ldb = bli_obj_col_stride( &data); + + double* alpha_p = bli_obj_buffer( &alpha ); + double* a_p = bli_obj_buffer( &data); + dimatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &ldb); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data); + f77_int cols_p = bli_obj_width( &data); + + f77_int lda = bli_obj_col_stride( &data); + f77_int ldb = bli_obj_col_stride( &data); + + scomplex* alpha_p = bli_obj_buffer( &alpha ); + scomplex* a_p = bli_obj_buffer( &data); + + cimatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &ldb); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data); + f77_int cols_p = bli_obj_width( &data); + + f77_int lda = bli_obj_col_stride( &data); + f77_int ldb = bli_obj_col_stride( &data); + + dcomplex* alpha_p = bli_obj_buffer( &alpha ); + dcomplex* a_p = bli_obj_buffer( &data); + + zimatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &ldb); + } + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + gflops = ( 2.0 * rows * cols ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin)/p_inc + 1, + ( unsigned long )rows, + ( unsigned long )cols, gflops ); + bli_obj_free( &alpha ); + bli_obj_free( &data ); + } + //printf("imatcopy_ test.....end\n"); + return (0); +} diff --git a/test/test_omatadd.c b/test/test_omatadd.c new file mode 100644 index 0000000000..6404b344e7 --- /dev/null +++ b/test/test_omatadd.c @@ -0,0 +1,183 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifdef WIN32 +#include +#else +#include +#endif +#include "blis.h" + +int main( int argc, char** argv ) +{ + //printf("omatadd_ test.....start\n"); + + obj_t data_aptr; + obj_t data_bptr; + obj_t data_cptr; + obj_t alpha; + obj_t beta; + dim_t p; + dim_t p_begin, p_end, p_inc; + p_begin = 200; + p_end = 2000; + p_inc = 200; + + trans_t transa,transb; + f77_char f77_transa,f77_transb; + + num_t dt; + int rows; + int cols; + + inc_t cs_data; + + double dtime; + double dtime_save; + double gflops; + + dt = BLIS_DOUBLE; + transa = BLIS_NO_TRANSPOSE; + transb = BLIS_NO_TRANSPOSE; + bli_param_map_blis_to_netlib_trans( transa, &f77_transa); + bli_param_map_blis_to_netlib_trans( transb, &f77_transb); + + dtime_save = DBL_MAX; + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + rows = p; + cols = p; + cs_data = rows; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha); + bli_obj_create( dt, 1, 1, 0, 0, &beta); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_aptr ); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_bptr ); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_cptr ); + + bli_randm( &data_aptr ); + bli_randm( &data_bptr ); + bli_setsc( (2.0/1.0), 0.0, &alpha ); + bli_setsc( (2.0/1.0), 0.0, &beta ); + + dtime = bli_clock(); + + if ( bli_is_float( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + f77_int ldc = bli_obj_col_stride( &data_cptr); + + float* alpha_p = bli_obj_buffer( &alpha ); + float* beta_p = bli_obj_buffer( &beta ); + float* a_p = bli_obj_buffer( &data_aptr); + float* b_p = bli_obj_buffer( &data_bptr); + float* c_p = bli_obj_buffer( &data_cptr); + + somatadd_ (&f77_transa,&f77_transb, &rows_p, &cols_p, alpha_p, a_p, &lda, beta_p, b_p, &ldb, c_p, &ldc); + } + else if ( bli_is_double( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + f77_int ldc = bli_obj_col_stride( &data_cptr); + + double* alpha_p = bli_obj_buffer( &alpha ); + double* beta_p = bli_obj_buffer( &beta ); + double* a_p = bli_obj_buffer( &data_aptr); + double* b_p = bli_obj_buffer( &data_bptr); + double* c_p = bli_obj_buffer( &data_cptr); + + domatadd_ (&f77_transa,&f77_transb, &rows_p, &cols_p, alpha_p, a_p, &lda, beta_p, b_p, &ldb, c_p, &ldc); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + f77_int ldc = bli_obj_col_stride( &data_cptr); + + scomplex* alpha_p = bli_obj_buffer( &alpha ); + scomplex* beta_p = bli_obj_buffer( &beta ); + scomplex* a_p = bli_obj_buffer( &data_aptr); + scomplex* b_p = bli_obj_buffer( &data_bptr); + scomplex* c_p = bli_obj_buffer( &data_cptr); + + comatadd_ (&f77_transa,&f77_transb, &rows_p, &cols_p, alpha_p, a_p, &lda, beta_p, b_p, &ldb, c_p, &ldc); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + f77_int ldc = bli_obj_col_stride( &data_cptr); + + dcomplex* alpha_p = bli_obj_buffer( &alpha ); + dcomplex* beta_p = bli_obj_buffer( &beta ); + dcomplex* a_p = bli_obj_buffer( &data_aptr); + dcomplex* b_p = bli_obj_buffer( &data_bptr); + dcomplex* c_p = bli_obj_buffer( &data_cptr); + + zomatadd_ (&f77_transa,&f77_transb, &rows_p, &cols_p, alpha_p, a_p, &lda, beta_p, b_p, &ldb, c_p, &ldc); + } + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + gflops = ( 2.0 * rows * cols ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin)/p_inc + 1, + ( unsigned long )rows, + ( unsigned long )cols, gflops ); + bli_obj_free( &alpha ); + bli_obj_free( &beta ); + bli_obj_free( &data_aptr ); + bli_obj_free( &data_bptr ); + bli_obj_free( &data_cptr ); + } + //printf("omatadd_ test.....end\n"); + return (0); +} diff --git a/test/test_omatcopy.c b/test/test_omatcopy.c new file mode 100644 index 0000000000..7619512f5e --- /dev/null +++ b/test/test_omatcopy.c @@ -0,0 +1,161 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifdef WIN32 +#include +#else +#include +#endif +#include "blis.h" + +int main( int argc, char** argv ) +{ + //printf("omatcopy_ test.....start\n"); + + obj_t data_aptr; + obj_t data_bptr; + obj_t alpha; + dim_t p; + dim_t p_begin, p_end, p_inc; + p_begin = 200; + p_end = 2000; + p_inc = 200; + + trans_t trans; + f77_char f77_trans; + + num_t dt; + int rows; + int cols; + + inc_t cs_data; + + double dtime; + double dtime_save; + double gflops; + + dt = BLIS_DOUBLE; + trans = BLIS_NO_TRANSPOSE; + bli_param_map_blis_to_netlib_trans( trans, &f77_trans); + + dtime_save = DBL_MAX; + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + rows = p; + cols = p; + cs_data = rows; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_aptr ); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_bptr ); + + bli_randm( &data_aptr ); + bli_setsc( (2.0/1.0), 0.0, &alpha ); + + dtime = bli_clock(); + + if ( bli_is_float( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + float* alpha_p = bli_obj_buffer( &alpha ); + float* a_p = bli_obj_buffer( &data_aptr); + float* b_p = bli_obj_buffer( &data_bptr); + + somatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, b_p, &ldb); + } + else if ( bli_is_double( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + double* alpha_p = bli_obj_buffer( &alpha ); + double* a_p = bli_obj_buffer( &data_aptr); + double* b_p = bli_obj_buffer( &data_bptr); + + domatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, b_p, &ldb); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + scomplex* alpha_p = bli_obj_buffer( &alpha ); + scomplex* a_p = bli_obj_buffer( &data_aptr); + scomplex* b_p = bli_obj_buffer( &data_bptr); + + comatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, b_p, &ldb); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + dcomplex* alpha_p = bli_obj_buffer( &alpha ); + dcomplex* a_p = bli_obj_buffer( &data_aptr); + dcomplex* b_p = bli_obj_buffer( &data_bptr); + + zomatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, b_p, &ldb); + } + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + gflops = ( 2.0 * rows * cols ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin)/p_inc + 1, + ( unsigned long )rows, + ( unsigned long )cols, gflops ); + bli_obj_free( &alpha ); + bli_obj_free( &data_aptr ); + bli_obj_free( &data_bptr ); + } + //printf("omatcopy_ test.....end\n"); + return (0); +} diff --git a/test/test_omatcopy2.c b/test/test_omatcopy2.c new file mode 100644 index 0000000000..b81ab4f5fa --- /dev/null +++ b/test/test_omatcopy2.c @@ -0,0 +1,170 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifdef WIN32 +#include +#else +#include +#endif +#include "blis.h" + +int main( int argc, char** argv ) +{ + //printf("omatcopy2_ test.....start\n"); + + obj_t data_aptr; + obj_t data_bptr; + obj_t alpha; + dim_t p; + dim_t p_begin, p_end, p_inc; + p_begin = 200; + p_end = 2000; + p_inc = 200; + + trans_t trans; + f77_char f77_trans; + f77_int stridea; + f77_int strideb; + + num_t dt; + int rows; + int cols; + + inc_t cs_data_bptr; + inc_t cs_data_aptr; + + double dtime; + double dtime_save; + double gflops; + + dt = BLIS_DOUBLE; + trans = BLIS_NO_TRANSPOSE; + bli_param_map_blis_to_netlib_trans( trans, &f77_trans); + + stridea = 1; + strideb = 1; + dtime_save = DBL_MAX; + for ( p = p_begin; p <= p_end; p += p_inc ) + { + rows = p; + cols = p; + cs_data_aptr = rows + (rows - 1)*stridea; + cs_data_bptr = rows + (rows - 1)*strideb; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha); + bli_obj_create( dt, cs_data_aptr, cols, 1, cs_data_aptr, &data_aptr ); + bli_obj_create( dt, cs_data_bptr, cols, 1, cs_data_bptr, &data_bptr ); + + bli_randm( &data_aptr ); + bli_setsc( (2.0/1.0), 0.0, &alpha ); + + dtime = bli_clock(); + + if ( bli_is_float( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + rows_p = rows_p - (cols_p - 1)*stridea; + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + float* alpha_p = bli_obj_buffer( &alpha ); + float* a_p = bli_obj_buffer( &data_aptr); + float* b_p = bli_obj_buffer( &data_bptr); + + somatcopy2_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &stridea, b_p, &ldb, &strideb); + } + else if ( bli_is_double( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + rows_p = rows_p - (cols_p - 1)*stridea; + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + double* alpha_p = bli_obj_buffer( &alpha ); + double* a_p = bli_obj_buffer( &data_aptr); + double* b_p = bli_obj_buffer( &data_bptr); + + domatcopy2_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &stridea, b_p, &ldb, &strideb); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + rows_p = rows_p - (cols_p - 1)*stridea; + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + scomplex* alpha_p = bli_obj_buffer( &alpha ); + scomplex* a_p = bli_obj_buffer( &data_aptr); + scomplex* b_p = bli_obj_buffer( &data_bptr); + + comatcopy2_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &stridea, b_p, &ldb, &strideb); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + rows_p = rows_p - (cols_p - 1)*stridea; + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + dcomplex* alpha_p = bli_obj_buffer( &alpha ); + dcomplex* a_p = bli_obj_buffer( &data_aptr); + dcomplex* b_p = bli_obj_buffer( &data_bptr); + + zomatcopy2_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &stridea, b_p, &ldb, &strideb); + } + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + gflops = ( 2.0 * rows * cols ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin)/p_inc + 1, + ( unsigned long )rows, + ( unsigned long )cols, gflops ); + bli_obj_free( &alpha ); + bli_obj_free( &data_aptr ); + bli_obj_free( &data_bptr ); + } + //printf("omatcopy2_ test.....end\n"); + return (0); +} From 136b9e34252cbc2b707e9268aaddfe3b4eab820c Mon Sep 17 00:00:00 2001 From: satish kumar nuggu Date: Thu, 12 Nov 2020 21:30:29 +0530 Subject: [PATCH 2/2] Added Blas interface for ?imatcopy, ?omatcopy, ?omatadd, ?omatcopy2 AMD-Internal: [CPUPL-1116] Original review was in this commit http://gerrit-git.amd.com/c/cpulibraries/er/blis/+/428165. Added new commit for transpose API's Change-Id: I322389cc0be0aaccf82d1d0bb4476beea8694cd8 --- frame/compat/bla_imatcopy.c | 54 ++++++------------------------------ frame/compat/bla_omatadd.c | 50 ++++++--------------------------- frame/compat/bla_omatcopy.c | 54 +----------------------------------- frame/compat/bla_omatcopy2.c | 54 +----------------------------------- test/Makefile | 3 +- test/test_imatcopy.c | 2 -- test/test_omatadd.c | 2 -- test/test_omatcopy.c | 2 -- test/test_omatcopy2.c | 2 -- 9 files changed, 22 insertions(+), 201 deletions(-) diff --git a/frame/compat/bla_imatcopy.c b/frame/compat/bla_imatcopy.c index dd07bb5314..5d8a8af101 100644 --- a/frame/compat/bla_imatcopy.c +++ b/frame/compat/bla_imatcopy.c @@ -86,8 +86,6 @@ static void bli_ztranspose(dcomplex* A,dcomplex* B,dim_t cols, dim_t rows) void simatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha,float* aptr, f77_int* lda, f77_int* ldb) { - //printf("I am from simatcopy_\n"); - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); //bli_init_once(); if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || @@ -95,14 +93,12 @@ void simatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alp *trans == 'r' || *trans == 'R')) { bli_print_msg( " Invalid trans setting simatcopy_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) { bli_print_msg( " Invalid function parameters simatcopy_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); return ; } @@ -113,7 +109,7 @@ void simatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alp else if ( *trans == 't' || *trans == 'T') { //pre transpose - float* temp = (float* ) bli_malloc_user((*rows)*(*lda)*sizeof(float)); + float* temp = (float* ) bli_malloc_user((*rows)*(*lda)*sizeof(float),NULL); bli_stranspose(aptr,temp,*lda,*rows); for (dim_t i = 0; i < *cols; i++) @@ -128,7 +124,7 @@ void simatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alp else if ( *trans == 'c' || *trans == 'C') { //pre transpose - float* temp = (float* ) bli_malloc_user((*rows)*(*lda)*sizeof(float)); + float* temp = (float* ) bli_malloc_user((*rows)*(*lda)*sizeof(float),NULL); bli_stranspose(aptr,temp,*lda,*rows); for (dim_t i = 0; i < *cols; i++) @@ -149,13 +145,11 @@ void simatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alp { // do nothing } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return ; } void dimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha,double* aptr, f77_int* lda, f77_int* ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); //bli_init_once(); if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || @@ -163,14 +157,12 @@ void dimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* al *trans == 'r' || *trans == 'R')) { bli_print_msg( " Invalid trans setting dimatcopy_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) { bli_print_msg( " Invalid function parameters dimatcopy_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); return ; } @@ -181,7 +173,7 @@ void dimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* al else if ( *trans == 't' || *trans == 'T') { //pre transpose - double* temp = (double* ) bli_malloc_user((*rows)*(*lda)*sizeof(double)); + double* temp = (double* ) bli_malloc_user((*rows)*(*lda)*sizeof(double),NULL); bli_dtranspose(aptr,temp,*lda,*rows); for (dim_t i = 0; i < *cols; i++) @@ -197,7 +189,7 @@ void dimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* al else if ( *trans == 'c' || *trans == 'C') { //pre transpose - double* temp = (double* ) bli_malloc_user((*rows)*(*lda)*sizeof(double)); + double* temp = (double* ) bli_malloc_user((*rows)*(*lda)*sizeof(double),NULL); bli_dtranspose(aptr,temp,*lda,*rows); for (dim_t i = 0; i < *cols; i++) @@ -217,13 +209,11 @@ void dimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* al { // do nothing } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return ; } void cimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha,scomplex* aptr, f77_int* lda, f77_int* ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); //bli_init_once(); if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || @@ -231,14 +221,12 @@ void cimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* *trans == 'r' || *trans == 'R')) { bli_print_msg( " Invalid trans setting cimatcopy_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) { bli_print_msg( " Invalid function parameters cimatcopy_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); return ; } @@ -249,7 +237,7 @@ void cimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* else if ( *trans == 't' || *trans == 'T') { //pre transpose - scomplex* temp = (scomplex* ) bli_malloc_user((*rows)*(*lda)*sizeof(scomplex)); + scomplex* temp = (scomplex* ) bli_malloc_user((*rows)*(*lda)*sizeof(scomplex),NULL); bli_ctranspose(aptr,temp,*lda,*rows); //bli_ciMatCopy_cn(*cols,*rows,*alpha,temp,*lda,*ldb); @@ -265,7 +253,7 @@ void cimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* { //pre transpose - scomplex* temp = (scomplex* ) bli_malloc_user((*rows)*(*lda)*sizeof(scomplex)); + scomplex* temp = (scomplex* ) bli_malloc_user((*rows)*(*lda)*sizeof(scomplex),NULL); bli_ctranspose(aptr,temp,*lda,*rows); //bli_ciMatCopy_cr(*cols,*rows,*alpha,temp,*lda,*ldb); @@ -285,13 +273,11 @@ void cimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* { // do nothing } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return ; } void zimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha,dcomplex* aptr, f77_int* lda, f77_int* ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); //bli_init_once(); if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || @@ -299,14 +285,12 @@ void zimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* *trans == 'r' || *trans == 'R')) { bli_print_msg( " Invalid trans setting zimatcopy_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) { bli_print_msg( " Invalid function parameters dimatcopy_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); return ; } @@ -318,7 +302,7 @@ void zimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* { //pre transpose - dcomplex* temp = (dcomplex *) bli_malloc_user((*rows)*(*lda)*sizeof(dcomplex)); + dcomplex* temp = (dcomplex *) bli_malloc_user((*rows)*(*lda)*sizeof(dcomplex),NULL); bli_ztranspose(aptr,temp,*lda,*rows); //bli_ziMatCopy_cn(*cols,*rows,*alpha,temp,*lda,*ldb); @@ -333,7 +317,7 @@ void zimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* else if ( *trans == 'c' || *trans == 'C') { //pre transpose - dcomplex* temp = (dcomplex *) bli_malloc_user((*rows)*(*lda)*sizeof(dcomplex)); + dcomplex* temp = (dcomplex *) bli_malloc_user((*rows)*(*lda)*sizeof(dcomplex),NULL); bli_ztranspose(aptr,temp,*lda,*rows); //bli_ziMatCopy_cr(*cols,*rows,*alpha,temp,*lda,*ldb); @@ -353,14 +337,12 @@ void zimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* { // do nothing } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return ; } // suffix cn means - column major & non-trans static dim_t bli_siMatCopy_cn(dim_t rows,dim_t cols,const float alpha,float* a,dim_t lda, dim_t ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); dim_t i,j; float* s_aptr; @@ -369,7 +351,6 @@ static dim_t bli_siMatCopy_cn(dim_t rows,dim_t cols,const float alpha,float* a,d if ( rows <= 0 || cols <= 0 || a == NULL || lda < cols || ldb < cols) { bli_print_msg( " Invalid function parameters bli_siMatCopy_cn() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "Invalid function parameters"); return (0); } @@ -400,23 +381,19 @@ static dim_t bli_siMatCopy_cn(dim_t rows,dim_t cols,const float alpha,float* a,d s_aptr += lda; d_aptr += ldb; } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2); return(0); } // suffix cn means - column major & non-trans static dim_t bli_diMatCopy_cn(dim_t rows,dim_t cols,const double alpha,double* a,dim_t lda, dim_t ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); dim_t i,j; double* s_aptr; double* d_aptr; if ( rows <= 0 || cols <= 0 || a == NULL || lda < cols || ldb < cols) { - printf( " Invalid trans setting bli_diMatcopy_cn() %ld %ld %ld %ld \n", rows, cols, lda,ldb); bli_print_msg( " Invalid function parameters bli_diMatCopy_cn() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "Invalid function parameters"); return (0); } @@ -448,14 +425,12 @@ static dim_t bli_diMatCopy_cn(dim_t rows,dim_t cols,const double alpha,double* a s_aptr += lda; d_aptr += ldb; } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2); return(0); } // suffix cn means - column major & non-trans static dim_t bli_ciMatCopy_cn(dim_t rows,dim_t cols,const scomplex alpha,scomplex* a,dim_t lda, dim_t ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); dim_t i,j; scomplex* s_aptr; scomplex* d_aptr; @@ -463,7 +438,6 @@ static dim_t bli_ciMatCopy_cn(dim_t rows,dim_t cols,const scomplex alpha,scomple if ( rows <= 0 || cols <= 0 || a == NULL || lda < cols || ldb < cols) { bli_print_msg( " Invalid function parameters bli_ciMatCopy_cn() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "Invalid function parameters"); return (0); } s_aptr = a; @@ -496,14 +470,12 @@ static dim_t bli_ciMatCopy_cn(dim_t rows,dim_t cols,const scomplex alpha,scomple d_aptr += ldb; } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2); return(0); } // suffix cn means - column major & non-trans static dim_t bli_ziMatCopy_cn(dim_t rows,dim_t cols,const dcomplex alpha,dcomplex* a,dim_t lda, dim_t ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); dim_t i,j; dcomplex* s_aptr; dcomplex* d_aptr; @@ -511,7 +483,6 @@ static dim_t bli_ziMatCopy_cn(dim_t rows,dim_t cols,const dcomplex alpha,dcomple if ( rows <= 0 || cols <= 0 || a == NULL || lda < cols || ldb < cols) { bli_print_msg( " Invalid function parameters bli_ziMatCopy_cn() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "Invalid function parameters"); return (0); } s_aptr = a; @@ -543,14 +514,12 @@ static dim_t bli_ziMatCopy_cn(dim_t rows,dim_t cols,const dcomplex alpha,dcomple s_aptr += lda; d_aptr += ldb; } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2); return(0); } // suffix cr means - column major & conjugate static dim_t bli_ciMatCopy_cr(dim_t rows,dim_t cols,const scomplex alpha,scomplex* a,dim_t lda, dim_t ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); dim_t i,j; scomplex* s_aptr; scomplex* d_aptr; @@ -558,7 +527,6 @@ static dim_t bli_ciMatCopy_cr(dim_t rows,dim_t cols,const scomplex alpha,scomple if ( rows <= 0 || cols <= 0 || a == NULL || lda < cols || ldb < cols) { bli_print_msg( " Invalid function parameters bli_ciMatCopy_cr() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "Invalid function parameters"); return (0); } s_aptr = a; @@ -590,14 +558,12 @@ static dim_t bli_ciMatCopy_cr(dim_t rows,dim_t cols,const scomplex alpha,scomple s_aptr += lda; d_aptr += ldb; } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2); return(0); } // suffix cr means - column major & conjugate static dim_t bli_ziMatCopy_cr(dim_t rows,dim_t cols,const dcomplex alpha,dcomplex* a,dim_t lda, dim_t ldb) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); dim_t i,j; dcomplex* s_aptr; dcomplex* d_aptr; @@ -605,7 +571,6 @@ static dim_t bli_ziMatCopy_cr(dim_t rows,dim_t cols,const dcomplex alpha,dcomple if ( rows <= 0 || cols <= 0 || a == NULL || lda < cols || ldb < cols) { bli_print_msg( " Invalid function parameters bli_ziMatCopy_cr() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "Invalid function parameters"); return (0); } s_aptr = a; @@ -637,8 +602,7 @@ static dim_t bli_ziMatCopy_cr(dim_t rows,dim_t cols,const dcomplex alpha,dcomple s_aptr += lda; d_aptr += ldb; } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2); return(0); } -#endif \ No newline at end of file +#endif diff --git a/frame/compat/bla_omatadd.c b/frame/compat/bla_omatadd.c index 2ae7ee5ef7..7dd9ee333a 100644 --- a/frame/compat/bla_omatadd.c +++ b/frame/compat/bla_omatadd.c @@ -98,13 +98,11 @@ static void bli_zconjugate(dcomplex* A,dim_t cols,dim_t rows) void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); //bli_init_once(); if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1) { bli_print_msg( " Invalid function parameters somatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); return ; } if ( !(*transa == 'n' || *transa == 'N' || @@ -113,7 +111,6 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const *transa == 'r' || *transa == 'R')) { bli_print_msg( " Invalid value of transa somatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } if ( !(*transb == 'n' || *transb == 'N' || @@ -122,7 +119,6 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const *transb == 'r' || *transb == 'R')) { bli_print_msg( " Invalid value of transb somatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } float* aptr; @@ -132,7 +128,7 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const if(*transa == 't' || *transa == 'T' || *transa == 'c' || *transa == 'C') { - aptr = (float *) bli_malloc_user((*m)*(*lda)*sizeof(float)); + aptr = (float *) bli_malloc_user((*m)*(*lda)*sizeof(float),NULL); bli_stranspose(A,aptr,*m,*lda); } else @@ -143,7 +139,7 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const if(*transb == 't' || *transb == 'T' || *transb == 'c' || *transb == 'C') { - bptr = (float *) bli_malloc_user((*m)*(*ldb)*sizeof(float)); + bptr = (float *) bli_malloc_user((*m)*(*ldb)*sizeof(float),NULL); bli_stranspose(B,bptr,*m,*ldb); } else @@ -164,18 +160,15 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const { bli_free_user(bptr); } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return ; } void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); //bli_init_once(); if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1) { bli_print_msg( " Invalid function parameters domatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); return ; } if ( !(*transa == 'n' || *transa == 'N' || @@ -184,7 +177,6 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const *transa == 'r' || *transa == 'R')) { bli_print_msg( " Invalid value of transa domatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } if ( !(*transb == 'n' || *transb == 'N' || @@ -193,7 +185,6 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const *transb == 'r' || *transb == 'R')) { bli_print_msg( " Invalid value of transb domatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } double* aptr; @@ -203,7 +194,7 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const if(*transa == 't' || *transa == 'T' || *transa == 'c' || *transa == 'C') { - aptr = (double *) bli_malloc_user((*m)*(*lda)*sizeof(double)); + aptr = (double *) bli_malloc_user((*m)*(*lda)*sizeof(double),NULL); bli_dtranspose(A,aptr,*m,*lda); } else @@ -214,7 +205,7 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const if(*transb == 't' || *transb == 'T' || *transb == 'c' || *transb == 'C') { - bptr = (double *) bli_malloc_user((*m)*(*ldb)*sizeof(double)); + bptr = (double *) bli_malloc_user((*m)*(*ldb)*sizeof(double),NULL); bli_dtranspose(B,bptr,*m,*ldb); } else @@ -235,18 +226,15 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const { bli_free_user(bptr); } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return ; } void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); //bli_init_once(); if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1) { bli_print_msg( " Invalid function parameters comatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); return ; } if ( !(*transa == 'n' || *transa == 'N' || @@ -255,7 +243,6 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const *transa == 'r' || *transa == 'R')) { bli_print_msg( " Invalid value for transa comatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } if ( !(*transb == 'n' || *transb == 'N' || @@ -264,7 +251,6 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const *transb == 'r' || *transb == 'R')) { bli_print_msg( " Invalid value of transb domatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } scomplex* aptr; @@ -274,7 +260,7 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const if(*transa == 't' || *transa == 'T' || *transa == 'c' || *transa == 'C') { - aptr = (scomplex *) bli_malloc_user((*m)*(*lda)*sizeof(scomplex)); + aptr = (scomplex *) bli_malloc_user((*m)*(*lda)*sizeof(scomplex),NULL); bli_ctranspose(A,aptr,*m,*lda); } else @@ -285,7 +271,7 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const if(*transb == 't' || *transb == 'T' || *transb == 'c' || *transb == 'C') { - bptr = (scomplex *) bli_malloc_user((*m)*(*ldb)*sizeof(scomplex)); + bptr = (scomplex *) bli_malloc_user((*m)*(*ldb)*sizeof(scomplex),NULL); bli_ctranspose(B,bptr,*m,*ldb); } else @@ -319,18 +305,15 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const { bli_free_user(bptr); } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return ; } void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); //bli_init_once(); if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1) { bli_print_msg( " Invalid function parameters zomatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid function parameters"); return ; } @@ -340,7 +323,6 @@ void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const *transa == 'r' || *transa == 'R')) { bli_print_msg( " Invalid value for transa zomatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } if ( !(*transb == 'n' || *transb == 'N' || @@ -349,7 +331,6 @@ void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const *transb == 'r' || *transb == 'R')) { bli_print_msg( " Invalid value for transb zomatadd_() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "Invalid value for trans parameter"); return ; } @@ -360,7 +341,7 @@ void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const if(*transa == 't' || *transa == 'T' || *transa == 'c' || *transa == 'C') { - aptr = (dcomplex *) bli_malloc_user((*m)*(*lda)*sizeof(dcomplex)); + aptr = (dcomplex *) bli_malloc_user((*m)*(*lda)*sizeof(dcomplex),NULL); bli_ztranspose(A,aptr,*m,*lda); } else @@ -371,7 +352,7 @@ void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const if(*transb == 't' || *transb == 'T' || *transb == 'c' || *transb == 'C') { - bptr = (dcomplex *) bli_malloc_user((*m)*(*ldb)*sizeof(dcomplex)); + bptr = (dcomplex *) bli_malloc_user((*m)*(*ldb)*sizeof(dcomplex),NULL); bli_ztranspose(B,bptr,*m,*ldb); } else @@ -405,18 +386,15 @@ void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const { bli_free_user(bptr); } - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return ; } static dim_t bli_soMatAdd_cn(dim_t rows,dim_t cols,const float alpha,float* aptr,dim_t lda,const float beta,float* bptr,dim_t ldb,float* C,dim_t ldc) { - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); dim_t i,j; if ( rows <= 0 || cols <= 0 || aptr == NULL || lda < rows || bptr == NULL || ldb < rows || C == NULL || ldc < rows ) { bli_print_msg( " Invalid function parameters bli_soMatAdd_cn() .", __FILE__, __LINE__ ); - AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "Invalid function parameters"); return (0); } for ( i=0; i