From 00d746323a553290ed73de981ca8f48817f8c106 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 11 May 2022 15:29:32 -0500 Subject: [PATCH 1/2] Preliminary support for c_next in auxinfo_t. Details: - Added .c_next field to auxinfo_t struct definition. - Defined accessor macros for the auxinfo_t.c_next field. - Compute reasonable values for c_next within the gemm macrokernel (bli_gemm_ker_var2.c) and embed them within the local auxinfo_t that is passed along into the gemm microkernel. Thanks to Devin Matthews and AMD for their contributions toward this feature. --- frame/3/bli_l3_thrinfo.h | 2 ++ frame/3/gemm/bli_gemm_ker_var2.c | 7 +++++++ frame/base/bli_auxinfo.h | 8 ++++++++ frame/include/bli_type_defs.h | 5 +++-- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/frame/3/bli_l3_thrinfo.h b/frame/3/bli_l3_thrinfo.h index 37a3909fd6..cab83a39a6 100644 --- a/frame/3/bli_l3_thrinfo.h +++ b/frame/3/bli_l3_thrinfo.h @@ -43,6 +43,8 @@ // change depending on BLIS_ENABLE_JRIR_SLAB / BLIS_ENABLE_JRIR_RR. #define bli_gemm_get_next_a_upanel( a1, step, inc ) ( a1 + step * inc ) #define bli_gemm_get_next_b_upanel( b1, step, inc ) ( b1 + step * inc ) +#define bli_gemm_get_next_c_utilem( c1, step, inc ) ( c1 + step * inc ) +#define bli_gemm_get_next_c_utilen( c1, step, inc ) ( c1 + step * inc ) // gemmt diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index 199e72cb65..c9e8957ec7 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -293,18 +293,25 @@ void bli_gemm_ker_var2 // Compute the addresses of the next panels of A and B. const char* a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); + const char* c2 = bli_gemm_get_next_c_utilem( c11, rstep_c, ir_inc ); if ( bli_is_last_iter( i, ir_end, ir_tid, ir_nt ) ) { a2 = a_cast; b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); + c2 = bli_gemm_get_next_c_utilen( c1, cstep_c, jr_inc ); if ( bli_is_last_iter( j, jr_end, jr_tid, jr_nt ) ) + { b2 = b_cast; + c2 = bli_gemm_get_next_c_utilem( c_cast, rs_c, m ); + c2 = bli_gemm_get_next_c_utilem( c2, rstep_c, ir_inc ); + } } // Save addresses of next panels of A and B to the auxinfo_t // object. bli_auxinfo_set_next_a( a2, &aux ); bli_auxinfo_set_next_b( b2, &aux ); + bli_auxinfo_set_next_b( c2, &aux ); // Edge case handling now occurs within the microkernel itself, but // we must still explicitly accumulate to a temporary microtile in diff --git a/frame/base/bli_auxinfo.h b/frame/base/bli_auxinfo.h index 166480b30a..73b54a7d91 100644 --- a/frame/base/bli_auxinfo.h +++ b/frame/base/bli_auxinfo.h @@ -55,6 +55,10 @@ BLIS_INLINE const void* bli_auxinfo_next_b( const auxinfo_t* ai ) { return ai->b_next; } +BLIS_INLINE const void* bli_auxinfo_next_c( const auxinfo_t* ai ) +{ + return ai->c_next; +} BLIS_INLINE inc_t bli_auxinfo_is_a( const auxinfo_t* ai ) { @@ -103,6 +107,10 @@ BLIS_INLINE void bli_auxinfo_set_next_b( const void* p, auxinfo_t* ai ) { ai->b_next = p; } +BLIS_INLINE void bli_auxinfo_set_next_c( const void* p, auxinfo_t* ai ) +{ + ai->c_next = p; +} BLIS_INLINE void bli_auxinfo_set_next_ab( const void* ap, const void* bp, auxinfo_t* ai ) { ai->a_next = ap; diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index e957fc6b23..2ce1a932ea 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -1109,10 +1109,11 @@ typedef struct pack_t schema_a; pack_t schema_b; - // Pointers to the micro-panels of A and B which will be used by the - // next call to the micro-kernel. + // Pointers to the micro-panels of A and B, and micro-tile of C, which + // will be used by the next call to the micro-kernel. const void* a_next; const void* b_next; + const void* c_next; // The imaginary strides of A and B. inc_t is_a; From 64b94d64a430bcc6cc6e7da0ed47fa1b18e6d5a4 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 15 Jun 2022 16:42:35 -0500 Subject: [PATCH 2/2] Fixed typo when setting c_next in gemm macrokernel. --- CREDITS | 1 + frame/3/gemm/bli_gemm_ker_var2.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index b701598cff..43c7b3ed53 100644 --- a/CREDITS +++ b/CREDITS @@ -84,6 +84,7 @@ but many others have contributed code and feedback, including Michael Rader @mrader1248 Pradeep Rao @pradeeptrgit (AMD) Aleksei Rechinskii + Leick Robinson @LeickR (Oracle) Karl Rupp @karlrupp Martin Schatz (The University of Texas at Austin) Nico Schlömer @nschloe diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index c9e8957ec7..5ccc20825c 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -311,7 +311,7 @@ void bli_gemm_ker_var2 // object. bli_auxinfo_set_next_a( a2, &aux ); bli_auxinfo_set_next_b( b2, &aux ); - bli_auxinfo_set_next_b( c2, &aux ); + bli_auxinfo_set_next_c( c2, &aux ); // Edge case handling now occurs within the microkernel itself, but // we must still explicitly accumulate to a temporary microtile in