diff --git a/frame/compat/bla_imatcopy.c b/frame/compat/bla_imatcopy.c new file mode 100644 index 0000000000..5d8a8af101 --- /dev/null +++ b/frame/compat/bla_imatcopy.c @@ -0,0 +1,608 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#ifdef BLIS_ENABLE_BLAS + +static dim_t bli_siMatCopy_cn(dim_t rows,dim_t cols,const float alpha,float* a,dim_t lda, dim_t ldb); + +static dim_t bli_diMatCopy_cn(dim_t rows,dim_t cols,const double alpha,double* a,dim_t lda, dim_t ldb); + +static dim_t bli_ciMatCopy_cn(dim_t rows,dim_t cols,const scomplex alpha,scomplex* a,dim_t lda, dim_t ldb); + +static dim_t bli_ciMatCopy_cr(dim_t rows,dim_t cols,const scomplex alpha,scomplex* a,dim_t lda, dim_t ldb); + +static dim_t bli_ziMatCopy_cn(dim_t rows,dim_t cols,const dcomplex alpha,dcomplex* a,dim_t lda, dim_t ldb); + +static dim_t bli_ziMatCopy_cr(dim_t rows,dim_t cols,const dcomplex alpha,dcomplex* a,dim_t lda, dim_t ldb); + +static void bli_stranspose(float* A,float* B,dim_t cols, dim_t rows); + +static void bli_dtranspose(double* A,double* B,dim_t cols, dim_t rows); + +static void bli_ctranspose(scomplex* A,scomplex* B,dim_t cols, dim_t rows); + +static void bli_ztranspose(dcomplex* A,dcomplex* B,dim_t cols, dim_t rows); + +static void bli_stranspose(float* A,float* B,dim_t cols, dim_t rows) +{ + for (dim_t i = 0; i < cols; i++) + for (dim_t j = 0; j < rows; j++) + B[j*cols + i] = A[i*rows +j]; +} + +static void bli_dtranspose(double* A,double* B,dim_t cols, dim_t rows) +{ + for (dim_t i = 0; i < cols; i++) + for (dim_t j = 0; j < rows; j++) + B[j*cols + i] = A[i*rows +j]; +} + +static void bli_ctranspose(scomplex* A,scomplex* B,dim_t cols, dim_t rows) +{ + for (dim_t i = 0; i < cols; i++) + for (dim_t j = 0; j < rows; j++) + B[j*cols + i] = A[i*rows +j]; +} + +static void bli_ztranspose(dcomplex* A,dcomplex* B,dim_t cols, dim_t rows) +{ + for (dim_t i = 0; i < cols; i++) + for (dim_t j = 0; j < rows; j++) + B[j*cols + i] = A[i*rows +j]; +} + +void simatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha,float* aptr, f77_int* lda, f77_int* ldb) +{ + //bli_init_once(); + if ( !(*trans == 'n' || *trans == 'N' || + *trans == 't' || *trans == 'T' || + *trans == 'c' || *trans == 'C' || + *trans == 'r' || *trans == 'R')) + { + bli_print_msg( " Invalid trans setting simatcopy_() .", __FILE__, __LINE__ ); + return ; + } + + if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) + { + bli_print_msg( " Invalid function parameters simatcopy_() .", __FILE__, __LINE__ ); + return ; + } + + if ( *trans == 'n' || *trans == 'N') + { + bli_siMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else if ( *trans == 't' || *trans == 'T') + { + //pre transpose + float* temp = (float* ) bli_malloc_user((*rows)*(*lda)*sizeof(float),NULL); + bli_stranspose(aptr,temp,*lda,*rows); + + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(float)); + + bli_siMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_stranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'c' || *trans == 'C') + { + //pre transpose + float* temp = (float* ) bli_malloc_user((*rows)*(*lda)*sizeof(float),NULL); + bli_stranspose(aptr,temp,*lda,*rows); + + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(float)); + + //bli_siMatCopy_cn(*cols,*rows,*alpha,temp,*lda,*ldb); + + bli_siMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + //post transpose + //bli_stranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'r' || *trans == 'R') + { + bli_siMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else + { + // do nothing + } + return ; +} + +void dimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha,double* aptr, f77_int* lda, f77_int* ldb) +{ + //bli_init_once(); + if ( !(*trans == 'n' || *trans == 'N' || + *trans == 't' || *trans == 'T' || + *trans == 'c' || *trans == 'C' || + *trans == 'r' || *trans == 'R')) + { + bli_print_msg( " Invalid trans setting dimatcopy_() .", __FILE__, __LINE__ ); + return ; + } + + if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) + { + bli_print_msg( " Invalid function parameters dimatcopy_() .", __FILE__, __LINE__ ); + return ; + } + + if ( *trans == 'n' || *trans == 'N') + { + bli_diMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else if ( *trans == 't' || *trans == 'T') + { + //pre transpose + double* temp = (double* ) bli_malloc_user((*rows)*(*lda)*sizeof(double),NULL); + bli_dtranspose(aptr,temp,*lda,*rows); + + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(double)); + + bli_diMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_dtranspose(temp,aptr,*rows,*lda); + //bli_dtranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'c' || *trans == 'C') + { + //pre transpose + double* temp = (double* ) bli_malloc_user((*rows)*(*lda)*sizeof(double),NULL); + bli_dtranspose(aptr,temp,*lda,*rows); + + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(double)); + + bli_diMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_dtranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'r' || *trans == 'R') + { + bli_diMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else + { + // do nothing + } + return ; +} + +void cimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha,scomplex* aptr, f77_int* lda, f77_int* ldb) +{ + //bli_init_once(); + if ( !(*trans == 'n' || *trans == 'N' || + *trans == 't' || *trans == 'T' || + *trans == 'c' || *trans == 'C' || + *trans == 'r' || *trans == 'R')) + { + bli_print_msg( " Invalid trans setting cimatcopy_() .", __FILE__, __LINE__ ); + return ; + } + + if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) + { + bli_print_msg( " Invalid function parameters cimatcopy_() .", __FILE__, __LINE__ ); + return ; + } + + if ( *trans == 'n' || *trans == 'N') + { + bli_ciMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else if ( *trans == 't' || *trans == 'T') + { + //pre transpose + scomplex* temp = (scomplex* ) bli_malloc_user((*rows)*(*lda)*sizeof(scomplex),NULL); + bli_ctranspose(aptr,temp,*lda,*rows); + + //bli_ciMatCopy_cn(*cols,*rows,*alpha,temp,*lda,*ldb); + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(scomplex)); + bli_ciMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_ctranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'c' || *trans == 'C') + { + + //pre transpose + scomplex* temp = (scomplex* ) bli_malloc_user((*rows)*(*lda)*sizeof(scomplex),NULL); + bli_ctranspose(aptr,temp,*lda,*rows); + + //bli_ciMatCopy_cr(*cols,*rows,*alpha,temp,*lda,*ldb); + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(scomplex)); + bli_ciMatCopy_cr(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_ctranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'r' || *trans == 'R') + { + bli_ciMatCopy_cr(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else + { + // do nothing + } + return ; +} + +void zimatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha,dcomplex* aptr, f77_int* lda, f77_int* ldb) +{ + //bli_init_once(); + if ( !(*trans == 'n' || *trans == 'N' || + *trans == 't' || *trans == 'T' || + *trans == 'c' || *trans == 'C' || + *trans == 'r' || *trans == 'R')) + { + bli_print_msg( " Invalid trans setting zimatcopy_() .", __FILE__, __LINE__ ); + return ; + } + + if ( *rows <= 0 || *cols <= 0 || alpha == NULL || aptr == NULL || *lda < 1 || *ldb < 1) + { + bli_print_msg( " Invalid function parameters dimatcopy_() .", __FILE__, __LINE__ ); + return ; + } + + if ( *trans == 'n' || *trans == 'N') + { + bli_ziMatCopy_cn(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else if ( *trans == 't' || *trans == 'T') + { + + //pre transpose + dcomplex* temp = (dcomplex *) bli_malloc_user((*rows)*(*lda)*sizeof(dcomplex),NULL); + bli_ztranspose(aptr,temp,*lda,*rows); + + //bli_ziMatCopy_cn(*cols,*rows,*alpha,temp,*lda,*ldb); + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(dcomplex)); + bli_ziMatCopy_cn(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_ztranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'c' || *trans == 'C') + { + //pre transpose + dcomplex* temp = (dcomplex *) bli_malloc_user((*rows)*(*lda)*sizeof(dcomplex),NULL); + bli_ztranspose(aptr,temp,*lda,*rows); + + //bli_ziMatCopy_cr(*cols,*rows,*alpha,temp,*lda,*ldb); + for (dim_t i = 0; i < *cols; i++) + memcpy(&aptr[i*(*lda)],&temp[i*(*lda)],(*rows)*sizeof(scomplex)); + bli_ziMatCopy_cr(*cols,*rows,*alpha,aptr,*lda,*ldb); + + //post transpose + //bli_ztranspose(temp,aptr,*lda,*cols); + bli_free_user(temp); + } + else if ( *trans == 'r' || *trans == 'R') + { + bli_ziMatCopy_cr(*rows,*cols,*alpha,aptr,*lda,*ldb); + } + else + { + // do nothing + } + return ; +} + +// suffix cn means - column major & non-trans +static dim_t bli_siMatCopy_cn(dim_t rows,dim_t cols,const float alpha,float* a,dim_t lda, dim_t ldb) +{ + dim_t i,j; + + float* s_aptr; + float* d_aptr; + + if ( rows <= 0 || cols <= 0 || a == NULL || lda < cols || ldb < cols) + { + bli_print_msg( " Invalid function parameters bli_siMatCopy_cn() .", __FILE__, __LINE__ ); + return (0); + } + + if ( lda == ldb && alpha == 1.0) + return (0); + + s_aptr = a; + d_aptr = a; + if ( alpha == 0.0 ) + { + for ( i=0; i +#else +#include +#endif +#include "blis.h" + +int main( int argc, char** argv ) +{ + + obj_t data; + obj_t alpha; + dim_t p; + dim_t p_begin, p_end, p_inc; + p_begin = 200; + p_end = 2000; + p_inc = 200; + + trans_t trans; + f77_char f77_trans; + + num_t dt; + int rows; + int cols; + //f77_int lda; + //f77_int ldb; + + inc_t cs_data; + + double dtime; + double dtime_save; + double gflops; + + /*rows = 4; + cols = 4; + lda = 4; + ldb = 4; +*/ + //cs_data = rows; + + dt = BLIS_DOUBLE; + trans = BLIS_NO_TRANSPOSE; + bli_param_map_blis_to_netlib_trans( trans, &f77_trans); + + dtime_save = DBL_MAX; + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + rows = p; + cols = p; + //lda = p; + //ldb = p; + + cs_data = rows; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha); + bli_obj_create( dt, rows, cols, 1, cs_data, &data ); + + bli_randm( &data ); + bli_setsc( (2.0/1.0), 0.0, &alpha ); + + dtime = bli_clock(); + + if ( bli_is_float( dt ) ) + { + f77_int rows_p = bli_obj_length( &data); + f77_int cols_p = bli_obj_width( &data); + + f77_int lda = bli_obj_col_stride( &data); + f77_int ldb = bli_obj_col_stride( &data); + + float* alpha_p = bli_obj_buffer( &alpha ); + float* a_p = bli_obj_buffer( &data); + + simatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &ldb); + } + else if ( bli_is_double( dt ) ) + { + f77_int rows_p = bli_obj_length( &data); + f77_int cols_p = bli_obj_width( &data); + + f77_int lda = bli_obj_col_stride( &data); + f77_int ldb = bli_obj_col_stride( &data); + + double* alpha_p = bli_obj_buffer( &alpha ); + double* a_p = bli_obj_buffer( &data); + dimatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &ldb); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data); + f77_int cols_p = bli_obj_width( &data); + + f77_int lda = bli_obj_col_stride( &data); + f77_int ldb = bli_obj_col_stride( &data); + + scomplex* alpha_p = bli_obj_buffer( &alpha ); + scomplex* a_p = bli_obj_buffer( &data); + + cimatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &ldb); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data); + f77_int cols_p = bli_obj_width( &data); + + f77_int lda = bli_obj_col_stride( &data); + f77_int ldb = bli_obj_col_stride( &data); + + dcomplex* alpha_p = bli_obj_buffer( &alpha ); + dcomplex* a_p = bli_obj_buffer( &data); + + zimatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &ldb); + } + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + gflops = ( 2.0 * rows * cols ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin)/p_inc + 1, + ( unsigned long )rows, + ( unsigned long )cols, gflops ); + bli_obj_free( &alpha ); + bli_obj_free( &data ); + } + return (0); +} diff --git a/test/test_omatadd.c b/test/test_omatadd.c new file mode 100644 index 0000000000..e741b505f9 --- /dev/null +++ b/test/test_omatadd.c @@ -0,0 +1,181 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifdef WIN32 +#include +#else +#include +#endif +#include "blis.h" + +int main( int argc, char** argv ) +{ + + obj_t data_aptr; + obj_t data_bptr; + obj_t data_cptr; + obj_t alpha; + obj_t beta; + dim_t p; + dim_t p_begin, p_end, p_inc; + p_begin = 200; + p_end = 2000; + p_inc = 200; + + trans_t transa,transb; + f77_char f77_transa,f77_transb; + + num_t dt; + int rows; + int cols; + + inc_t cs_data; + + double dtime; + double dtime_save; + double gflops; + + dt = BLIS_DOUBLE; + transa = BLIS_NO_TRANSPOSE; + transb = BLIS_NO_TRANSPOSE; + bli_param_map_blis_to_netlib_trans( transa, &f77_transa); + bli_param_map_blis_to_netlib_trans( transb, &f77_transb); + + dtime_save = DBL_MAX; + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + rows = p; + cols = p; + cs_data = rows; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha); + bli_obj_create( dt, 1, 1, 0, 0, &beta); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_aptr ); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_bptr ); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_cptr ); + + bli_randm( &data_aptr ); + bli_randm( &data_bptr ); + bli_setsc( (2.0/1.0), 0.0, &alpha ); + bli_setsc( (2.0/1.0), 0.0, &beta ); + + dtime = bli_clock(); + + if ( bli_is_float( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + f77_int ldc = bli_obj_col_stride( &data_cptr); + + float* alpha_p = bli_obj_buffer( &alpha ); + float* beta_p = bli_obj_buffer( &beta ); + float* a_p = bli_obj_buffer( &data_aptr); + float* b_p = bli_obj_buffer( &data_bptr); + float* c_p = bli_obj_buffer( &data_cptr); + + somatadd_ (&f77_transa,&f77_transb, &rows_p, &cols_p, alpha_p, a_p, &lda, beta_p, b_p, &ldb, c_p, &ldc); + } + else if ( bli_is_double( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + f77_int ldc = bli_obj_col_stride( &data_cptr); + + double* alpha_p = bli_obj_buffer( &alpha ); + double* beta_p = bli_obj_buffer( &beta ); + double* a_p = bli_obj_buffer( &data_aptr); + double* b_p = bli_obj_buffer( &data_bptr); + double* c_p = bli_obj_buffer( &data_cptr); + + domatadd_ (&f77_transa,&f77_transb, &rows_p, &cols_p, alpha_p, a_p, &lda, beta_p, b_p, &ldb, c_p, &ldc); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + f77_int ldc = bli_obj_col_stride( &data_cptr); + + scomplex* alpha_p = bli_obj_buffer( &alpha ); + scomplex* beta_p = bli_obj_buffer( &beta ); + scomplex* a_p = bli_obj_buffer( &data_aptr); + scomplex* b_p = bli_obj_buffer( &data_bptr); + scomplex* c_p = bli_obj_buffer( &data_cptr); + + comatadd_ (&f77_transa,&f77_transb, &rows_p, &cols_p, alpha_p, a_p, &lda, beta_p, b_p, &ldb, c_p, &ldc); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + f77_int ldc = bli_obj_col_stride( &data_cptr); + + dcomplex* alpha_p = bli_obj_buffer( &alpha ); + dcomplex* beta_p = bli_obj_buffer( &beta ); + dcomplex* a_p = bli_obj_buffer( &data_aptr); + dcomplex* b_p = bli_obj_buffer( &data_bptr); + dcomplex* c_p = bli_obj_buffer( &data_cptr); + + zomatadd_ (&f77_transa,&f77_transb, &rows_p, &cols_p, alpha_p, a_p, &lda, beta_p, b_p, &ldb, c_p, &ldc); + } + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + gflops = ( 2.0 * rows * cols ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin)/p_inc + 1, + ( unsigned long )rows, + ( unsigned long )cols, gflops ); + bli_obj_free( &alpha ); + bli_obj_free( &beta ); + bli_obj_free( &data_aptr ); + bli_obj_free( &data_bptr ); + bli_obj_free( &data_cptr ); + } + return (0); +} diff --git a/test/test_omatcopy.c b/test/test_omatcopy.c new file mode 100644 index 0000000000..87f1177eda --- /dev/null +++ b/test/test_omatcopy.c @@ -0,0 +1,159 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifdef WIN32 +#include +#else +#include +#endif +#include "blis.h" + +int main( int argc, char** argv ) +{ + + obj_t data_aptr; + obj_t data_bptr; + obj_t alpha; + dim_t p; + dim_t p_begin, p_end, p_inc; + p_begin = 200; + p_end = 2000; + p_inc = 200; + + trans_t trans; + f77_char f77_trans; + + num_t dt; + int rows; + int cols; + + inc_t cs_data; + + double dtime; + double dtime_save; + double gflops; + + dt = BLIS_DOUBLE; + trans = BLIS_NO_TRANSPOSE; + bli_param_map_blis_to_netlib_trans( trans, &f77_trans); + + dtime_save = DBL_MAX; + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + rows = p; + cols = p; + cs_data = rows; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_aptr ); + bli_obj_create( dt, rows, cols, 1, cs_data, &data_bptr ); + + bli_randm( &data_aptr ); + bli_setsc( (2.0/1.0), 0.0, &alpha ); + + dtime = bli_clock(); + + if ( bli_is_float( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + float* alpha_p = bli_obj_buffer( &alpha ); + float* a_p = bli_obj_buffer( &data_aptr); + float* b_p = bli_obj_buffer( &data_bptr); + + somatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, b_p, &ldb); + } + else if ( bli_is_double( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + double* alpha_p = bli_obj_buffer( &alpha ); + double* a_p = bli_obj_buffer( &data_aptr); + double* b_p = bli_obj_buffer( &data_bptr); + + domatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, b_p, &ldb); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + scomplex* alpha_p = bli_obj_buffer( &alpha ); + scomplex* a_p = bli_obj_buffer( &data_aptr); + scomplex* b_p = bli_obj_buffer( &data_bptr); + + comatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, b_p, &ldb); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + dcomplex* alpha_p = bli_obj_buffer( &alpha ); + dcomplex* a_p = bli_obj_buffer( &data_aptr); + dcomplex* b_p = bli_obj_buffer( &data_bptr); + + zomatcopy_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, b_p, &ldb); + } + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + gflops = ( 2.0 * rows * cols ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin)/p_inc + 1, + ( unsigned long )rows, + ( unsigned long )cols, gflops ); + bli_obj_free( &alpha ); + bli_obj_free( &data_aptr ); + bli_obj_free( &data_bptr ); + } + return (0); +} diff --git a/test/test_omatcopy2.c b/test/test_omatcopy2.c new file mode 100644 index 0000000000..b90b4ae883 --- /dev/null +++ b/test/test_omatcopy2.c @@ -0,0 +1,168 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifdef WIN32 +#include +#else +#include +#endif +#include "blis.h" + +int main( int argc, char** argv ) +{ + + obj_t data_aptr; + obj_t data_bptr; + obj_t alpha; + dim_t p; + dim_t p_begin, p_end, p_inc; + p_begin = 200; + p_end = 2000; + p_inc = 200; + + trans_t trans; + f77_char f77_trans; + f77_int stridea; + f77_int strideb; + + num_t dt; + int rows; + int cols; + + inc_t cs_data_bptr; + inc_t cs_data_aptr; + + double dtime; + double dtime_save; + double gflops; + + dt = BLIS_DOUBLE; + trans = BLIS_NO_TRANSPOSE; + bli_param_map_blis_to_netlib_trans( trans, &f77_trans); + + stridea = 1; + strideb = 1; + dtime_save = DBL_MAX; + for ( p = p_begin; p <= p_end; p += p_inc ) + { + rows = p; + cols = p; + cs_data_aptr = rows + (rows - 1)*stridea; + cs_data_bptr = rows + (rows - 1)*strideb; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha); + bli_obj_create( dt, cs_data_aptr, cols, 1, cs_data_aptr, &data_aptr ); + bli_obj_create( dt, cs_data_bptr, cols, 1, cs_data_bptr, &data_bptr ); + + bli_randm( &data_aptr ); + bli_setsc( (2.0/1.0), 0.0, &alpha ); + + dtime = bli_clock(); + + if ( bli_is_float( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + rows_p = rows_p - (cols_p - 1)*stridea; + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + float* alpha_p = bli_obj_buffer( &alpha ); + float* a_p = bli_obj_buffer( &data_aptr); + float* b_p = bli_obj_buffer( &data_bptr); + + somatcopy2_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &stridea, b_p, &ldb, &strideb); + } + else if ( bli_is_double( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + rows_p = rows_p - (cols_p - 1)*stridea; + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + double* alpha_p = bli_obj_buffer( &alpha ); + double* a_p = bli_obj_buffer( &data_aptr); + double* b_p = bli_obj_buffer( &data_bptr); + + domatcopy2_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &stridea, b_p, &ldb, &strideb); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + rows_p = rows_p - (cols_p - 1)*stridea; + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + scomplex* alpha_p = bli_obj_buffer( &alpha ); + scomplex* a_p = bli_obj_buffer( &data_aptr); + scomplex* b_p = bli_obj_buffer( &data_bptr); + + comatcopy2_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &stridea, b_p, &ldb, &strideb); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int rows_p = bli_obj_length( &data_aptr); + f77_int cols_p = bli_obj_width( &data_aptr); + rows_p = rows_p - (cols_p - 1)*stridea; + + f77_int lda = bli_obj_col_stride( &data_aptr); + f77_int ldb = bli_obj_col_stride( &data_bptr); + + dcomplex* alpha_p = bli_obj_buffer( &alpha ); + dcomplex* a_p = bli_obj_buffer( &data_aptr); + dcomplex* b_p = bli_obj_buffer( &data_bptr); + + zomatcopy2_ (&f77_trans, &rows_p, &cols_p, alpha_p, a_p, &lda, &stridea, b_p, &ldb, &strideb); + } + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + gflops = ( 2.0 * rows * cols ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin)/p_inc + 1, + ( unsigned long )rows, + ( unsigned long )cols, gflops ); + bli_obj_free( &alpha ); + bli_obj_free( &data_aptr ); + bli_obj_free( &data_bptr ); + } + return (0); +}