From 8bd916015a98f190edaae3aa705243317beb9ade Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 3 May 2022 08:51:50 +0300 Subject: [PATCH 1/8] add method to query HW size Signed-off-by: Jeff Hammond --- frame/include/blis.h | 1 + frame/thread/bli_affinity.c | 105 +++++++++++++++++++++++++++++ frame/thread/bli_affinity.h | 44 ++++++++++++ frame/thread/bli_l3_decor_openmp.c | 26 +++++++ 4 files changed, 176 insertions(+) create mode 100644 frame/thread/bli_affinity.c create mode 100644 frame/thread/bli_affinity.h diff --git a/frame/include/blis.h b/frame/include/blis.h index 98ebee878d..f912e93ed4 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -84,6 +84,7 @@ extern "C" { #include "bli_thread.h" #include "bli_pthread.h" +#include "bli_affinity.h" // -- Constant definitions -- diff --git a/frame/thread/bli_affinity.c b/frame/thread/bli_affinity.c new file mode 100644 index 0000000000..2744f8b6f1 --- /dev/null +++ b/frame/thread/bli_affinity.c @@ -0,0 +1,105 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022 NVIDIA + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +// we need a way to detect oversubscription of the kind where +// hierarchical parallelism is used and the affinity mask within +// which BLIS runs does not have enough hardware threads to support +// the requested software threads. +// +// this is motivated by, or related to: +// https://github.com/flame/blis/issues/588 +// https://github.com/flame/blis/pull/607 +// https://github.com/flame/blis/issues/604 +// https://github.com/flame/blis/issues/603 + +#include "bli_affinity.h" + +#ifndef BLIS_ENABLE_AFFINITY + +// define the symbol for platforms like Windows and MacOS that do not support the Linux affinity API + +dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) +{ + // this is the largest possible value returned by this function + // and it means that the affinity mask does not constrain the current scope. + return (dim_t)1024; +} + +#else + +// this macro has to come before any other headers +#define _GNU_SOURCE + +#include +#include + +// scope is either the calling process or the calling thread: +// 0 = calling process +// 1 = calling thread + +dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) +{ + int rc; + int active_cpus; + pid_t pid; + cpu_set_t mask; + + if (scope == 0) { + pid = getpid(); + } else { + // this means the current thread + pid = 0; + } + + CPU_ZERO(&mask); + + // if the CPU mask is larger than 1024 bits, this needs to change. + // see https://man7.org/linux/man-pages/man2/sched_getaffinity.2.html for details. + rc = sched_getaffinity(pid, sizeof(cpu_set_t), &mask); + if (rc) { + bli_print_msg( "sched_getaffinity failed", + __FILE__, __LINE__ ); + bli_abort(); + } + + active_cpus = 0; + for (int i=0; i Date: Tue, 3 May 2022 09:41:18 +0300 Subject: [PATCH 2/8] use Linux macro Signed-off-by: Jeff Hammond --- frame/thread/bli_affinity.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frame/thread/bli_affinity.c b/frame/thread/bli_affinity.c index 2744f8b6f1..5718d4713a 100644 --- a/frame/thread/bli_affinity.c +++ b/frame/thread/bli_affinity.c @@ -45,7 +45,7 @@ #include "bli_affinity.h" -#ifndef BLIS_ENABLE_AFFINITY +#ifndef BLIS_OS_LINUX // define the symbol for platforms like Windows and MacOS that do not support the Linux affinity API @@ -56,7 +56,7 @@ dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) return (dim_t)1024; } -#else +#else // BLIS_OS_LINUX // this macro has to come before any other headers #define _GNU_SOURCE @@ -102,4 +102,4 @@ dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) return active_cpus; } -#endif // BLIS_ENABLE_AFFINITY +#endif // BLIS_OS_LINUX From b3844fd0d0bb88e770dadd4eb1ce3ea0c11c9e05 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 3 May 2022 09:42:59 +0300 Subject: [PATCH 3/8] move headers around this is gross but i need #define GNU_SOURCE before any other headers, or the CPU_ISSET macro isn't defined. Signed-off-by: Jeff Hammond --- frame/thread/bli_affinity.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/frame/thread/bli_affinity.c b/frame/thread/bli_affinity.c index 5718d4713a..4e358bf634 100644 --- a/frame/thread/bli_affinity.c +++ b/frame/thread/bli_affinity.c @@ -43,10 +43,10 @@ // https://github.com/flame/blis/issues/604 // https://github.com/flame/blis/issues/603 -#include "bli_affinity.h" - #ifndef BLIS_OS_LINUX +#include "bli_affinity.h" + // define the symbol for platforms like Windows and MacOS that do not support the Linux affinity API dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) @@ -64,6 +64,8 @@ dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) #include #include +#include "bli_affinity.h" + // scope is either the calling process or the calling thread: // 0 = calling process // 1 = calling thread From 3193c90fdcd720050d2b7d1d480918916a7dc86d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 3 May 2022 11:17:45 +0300 Subject: [PATCH 4/8] add copyright because of changes to bli_l3_thread_decorator_thread_check Signed-off-by: Jeff Hammond --- frame/thread/bli_l3_decor_openmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/frame/thread/bli_l3_decor_openmp.c b/frame/thread/bli_l3_decor_openmp.c index f5f727cd8e..bf55be41a7 100644 --- a/frame/thread/bli_l3_decor_openmp.c +++ b/frame/thread/bli_l3_decor_openmp.c @@ -6,6 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. + Copyright (C) 2022, NVIDIA Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are From 629ef194440386fe4f5806da7d7fc9811c2d400b Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 3 May 2022 11:18:37 +0300 Subject: [PATCH 5/8] make affinity problem into a fatal error for now Signed-off-by: Jeff Hammond --- frame/thread/bli_l3_decor_openmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/frame/thread/bli_l3_decor_openmp.c b/frame/thread/bli_l3_decor_openmp.c index bf55be41a7..6f63a0acd2 100644 --- a/frame/thread/bli_l3_decor_openmp.c +++ b/frame/thread/bli_l3_decor_openmp.c @@ -260,6 +260,7 @@ void bli_l3_thread_decorator_thread_check bli_print_msg( "The affinity mask on this process does not have " "enough HW threads for your requested SW threads.", __FILE__, __LINE__ ); + bli_abort(); bli_thrcomm_init( n_threads_hwmask, gl_comm ); bli_rntm_set_num_threads_only( n_threads_hwmask, rntm ); From 3697489f4426aab989bd26ad1bcc9475ec5cfde7 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 3 May 2022 11:54:13 +0300 Subject: [PATCH 6/8] hack to fix the affinity macro business Signed-off-by: Jeff Hammond --- frame/thread/bli_affinity.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/frame/thread/bli_affinity.c b/frame/thread/bli_affinity.c index 4e358bf634..79e4e44072 100644 --- a/frame/thread/bli_affinity.c +++ b/frame/thread/bli_affinity.c @@ -32,6 +32,12 @@ */ +// this macro has to come before any other headers. +// i hate this but cannot figure out any other way to solve it. +#define _GNU_SOURCE + +#include "bli_affinity.h" + // we need a way to detect oversubscription of the kind where // hierarchical parallelism is used and the affinity mask within // which BLIS runs does not have enough hardware threads to support @@ -45,8 +51,6 @@ #ifndef BLIS_OS_LINUX -#include "bli_affinity.h" - // define the symbol for platforms like Windows and MacOS that do not support the Linux affinity API dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) @@ -58,14 +62,9 @@ dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) #else // BLIS_OS_LINUX -// this macro has to come before any other headers -#define _GNU_SOURCE - #include #include -#include "bli_affinity.h" - // scope is either the calling process or the calling thread: // 0 = calling process // 1 = calling thread @@ -77,7 +76,7 @@ dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope) pid_t pid; cpu_set_t mask; - if (scope == 0) { + if (scope == process) { pid = getpid(); } else { // this means the current thread From 5906e3f0babed42f4d1043f385d6a03bceae00cf Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 3 May 2022 11:54:49 +0300 Subject: [PATCH 7/8] add a test of the affinity check --- test/other/test_affinity.c | 76 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 test/other/test_affinity.c diff --git a/test/other/test_affinity.c b/test/other/test_affinity.c new file mode 100644 index 0000000000..110e06eb01 --- /dev/null +++ b/test/other/test_affinity.c @@ -0,0 +1,76 @@ +#define _GNU_SOURCE + +#include +#include + +#include +#include + +#include + +int main(void) +{ + int m=10, n=10, k=10; + double A[100], B[100], C[100]; + + for (int i=0; i<100; i++) { + A[i] = B[i] = C[i] = 1.0; + } + + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + 10, 10, 10, 1.0, A, 10, B, 10, 1.0, C, 10); + + { + int rc; + pid_t pid = getpid(); + cpu_set_t old_mask, new_mask; + int active_cpus; + + CPU_ZERO(&old_mask); + + rc = sched_getaffinity(pid, sizeof(cpu_set_t), &old_mask); + if (rc) { + printf("sched_getaffinity returned %d\n", rc); + abort(); + } + + active_cpus = 0; + for (int i=0; i Date: Tue, 3 May 2022 12:07:30 +0300 Subject: [PATCH 8/8] copyright Signed-off-by: Jeff Hammond --- test/other/test_affinity.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test/other/test_affinity.c b/test/other/test_affinity.c index 110e06eb01..14dae993a3 100644 --- a/test/other/test_affinity.c +++ b/test/other/test_affinity.c @@ -1,3 +1,39 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022 NVIDIA + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +// this macro has to come before any other headers. +// i hate this but cannot figure out any other way to solve it. #define _GNU_SOURCE #include