From ef2152e3cb815fcb6a502def7b52e71badb461fc Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 6 May 2025 14:23:39 -0600 Subject: [PATCH 001/103] P3663: Add constant_wrapper implementation It currently requires the C++23 feature "deducing this" in order to compile. I've tested it with dev Clang (21.0.0). It should work fine with Clang 20. --- CMakeLists.txt | 2 + .../__p2630_bits/constant_wrapper.hpp | 209 ++++++++++++++++++ .../__p2630_bits/strided_slice.hpp | 2 + tests/CMakeLists.txt | 12 +- tests/test_constant_wrapper.cpp | 72 ++++++ 5 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 include/experimental/__p2630_bits/constant_wrapper.hpp create mode 100644 tests/test_constant_wrapper.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c725399b..16e77159 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,8 @@ set_property(CACHE MDSPAN_CXX_STANDARD PROPERTY STRINGS DETECT 14 17 20 23) option(MDSPAN_ENABLE_CONCEPTS "Try to enable concepts support by giving extra flags." On) +option(MDSPAN_ENABLE_P3663 "Enable implementation of P3663 (Future-proof submdspan_mapping)." Off) + ################################################################################ # Decide on the standard to use diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp new file mode 100644 index 00000000..58dc7043 --- /dev/null +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -0,0 +1,209 @@ +#pragma once + +// Implementation borrowed from +// https://github.com/tzlaine/constexpr/blob/master/include/constant_wrapper.hpp +// to which P2781 links. Provisionally assume that the feature test +// macro will be called __cpp_lib_constant_wrapper and that the +// features in P2781 will go in . + +#include +#include + +#if ! defined(__cpp_lib_constant_wrapper) + +namespace std { + +namespace exposition_only { + template + struct cw_fixed_value; // exposition only +} + +// GCC 11.4 has ICE with +// "typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type" +// as second template parameter of constant_wrapper below. +// +// Replacing that expression with use of the following alias doesn't help. +// +// namespace exposition_only { +// template +// using unspecified_t = typename decltype(cw_fixed_value(X))::type; +// } + +template // exposition only +struct constant_wrapper; + +template +concept constexpr_param = requires { typename constant_wrapper; }; // exposition only + +namespace exposition_only { + template + struct cw_fixed_value { // exposition only + using type = T; + constexpr cw_fixed_value(type v) noexcept: data(v) { } + T data; + }; + + template + struct cw_fixed_value { // exposition only + using type = T[Extent]; + constexpr cw_fixed_value(T (&arr)[Extent]) noexcept: cw_fixed_value(arr, std::make_index_sequence()) { } + T data[Extent]; + + private: + template + constexpr cw_fixed_value(T (&arr)[Extent], std::index_sequence) noexcept: data{arr[Idx]...} { } + }; + + template + cw_fixed_value(T (&)[Extent]) -> cw_fixed_value; // exposition only + template + cw_fixed_value(T) -> cw_fixed_value; // exposition only + + struct cw_operators { // exposition only + // unary operators + template + friend constexpr auto operator+(T) noexcept -> constant_wrapper<(+T::value)> { return {}; } + template + friend constexpr auto operator-(T) noexcept -> constant_wrapper<(-T::value)> { return {}; } + template + friend constexpr auto operator~(T) noexcept -> constant_wrapper<(~T::value)> { return {}; } + template + friend constexpr auto operator!(T) noexcept -> constant_wrapper<(!T::value)> { return {}; } + template + friend constexpr auto operator&(T) noexcept -> constant_wrapper<(&T::value)> { return {}; } + template + friend constexpr auto operator*(T) noexcept -> constant_wrapper<(*T::value)> { return {}; } + + // binary operators + template + friend constexpr auto operator+(L, R) noexcept -> constant_wrapper<(L::value + R::value)> { return {}; } + template + friend constexpr auto operator-(L, R) noexcept -> constant_wrapper<(L::value - R::value)> { return {}; } + template + friend constexpr auto operator*(L, R) noexcept -> constant_wrapper<(L::value * R::value)> { return {}; } + template + friend constexpr auto operator/(L, R) noexcept -> constant_wrapper<(L::value / R::value)> { return {}; } + template + friend constexpr auto operator%(L, R) noexcept -> constant_wrapper<(L::value % R::value)> { return {}; } + + template + friend constexpr auto operator<<(L, R) noexcept -> constant_wrapper<(L::value << R::value)> { return {}; } + template + friend constexpr auto operator>>(L, R) noexcept -> constant_wrapper<(L::value >> R::value)> { return {}; } + template + friend constexpr auto operator&(L, R) noexcept -> constant_wrapper<(L::value & R::value)> { return {}; } + template + friend constexpr auto operator|(L, R) noexcept -> constant_wrapper<(L::value | R::value)> { return {}; } + template + friend constexpr auto operator^(L, R) noexcept -> constant_wrapper<(L::value ^ R::value)> { return {}; } + + template + requires (!is_constructible_v || !is_constructible_v) + friend constexpr auto operator&&(L, R) noexcept -> constant_wrapper<(L::value && R::value)> { return {}; } + template + requires (!is_constructible_v || !is_constructible_v) + friend constexpr auto operator||(L, R) noexcept -> constant_wrapper<(L::value || R::value)> { return {}; } + + // comparisons + template + friend constexpr auto operator<=>(L, R) noexcept -> constant_wrapper<(L::value <=> R::value)> { return {}; } + template + friend constexpr auto operator<(L, R) noexcept -> constant_wrapper<(L::value < R::value)> { return {}; } + template + friend constexpr auto operator<=(L, R) noexcept -> constant_wrapper<(L::value <= R::value)> { return {}; } + template + friend constexpr auto operator==(L, R) noexcept -> constant_wrapper<(L::value == R::value)> { return {}; } + template + friend constexpr auto operator!=(L, R) noexcept -> constant_wrapper<(L::value != R::value)> { return {}; } + template + friend constexpr auto operator>(L, R) noexcept -> constant_wrapper<(L::value > R::value)> { return {}; } + template + friend constexpr auto operator>=(L, R) noexcept -> constant_wrapper<(L::value >= R::value)> { return {}; } + + template + friend constexpr auto operator->*(L, R) noexcept -> constant_wrapper*R::value> { return {}; } + +#if defined(__cpp_explicit_this_parameter) + // call and index + template + constexpr auto operator()(this T, Args...) noexcept + requires requires(Args...) { constant_wrapper(); } + { return constant_wrapper{}; } + template + constexpr auto operator[](this T, Args...) noexcept -> constant_wrapper<(T::value[Args::value...])> + { return {}; } + + // pseudo-mutators + template + constexpr auto operator++(this T) noexcept requires requires(T::value_type x) { ++x; } + { return constant_wrapper<[] { auto c = T::value; return ++c; }()>{}; } + template + constexpr auto operator++(this T, int) noexcept requires requires(T::value_type x) { x++; } + { return constant_wrapper<[] { auto c = T::value; return c++; }()>{}; } + + template + constexpr auto operator--(this T) noexcept requires requires(T::value_type x) { --x; } + { return constant_wrapper<[] { auto c = T::value; return --c; }()>{}; } + template + constexpr auto operator--(this T, int) noexcept requires requires(T::value_type x) { x--; } + { return constant_wrapper<[] { auto c = T::value; return c--; }()>{}; } + + template + constexpr auto operator+=(this T, R) noexcept requires requires(T::value_type x) { x += R::value; } + { return constant_wrapper<[] { auto v = T::value; return v += R::value; }()>{}; } + template + constexpr auto operator-=(this T, R) noexcept requires requires(T::value_type x) { x -= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v -= R::value; }()>{}; } + template + constexpr auto operator*=(this T, R) noexcept requires requires(T::value_type x) { x *= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v *= R::value; }()>{}; } + template + constexpr auto operator/=(this T, R) noexcept requires requires(T::value_type x) { x /= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v /= R::value; }()>{}; } + template + constexpr auto operator%=(this T, R) noexcept requires requires(T::value_type x) { x %= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v %= R::value; }()>{}; } + template + constexpr auto operator&=(this T, R) noexcept requires requires(T::value_type x) { x &= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v &= R::value; }()>{}; } + template + constexpr auto operator|=(this T, R) noexcept requires requires(T::value_type x) { x |= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v |= R::value; }()>{}; } + template + constexpr auto operator^=(this T, R) noexcept requires requires(T::value_type x) { x ^= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v ^= R::value; }()>{}; } + template + constexpr auto operator<<=(this T, R) noexcept requires requires(T::value_type x) { x <<= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v <<= R::value; }()>{}; } + template + constexpr auto operator>>=(this T, R) noexcept requires requires(T::value_type x) { x >>= R::value; } + { return constant_wrapper<[] { auto v = T::value; return v >>= R::value; }()>{}; } +#endif // __cpp_explicit_this_parameter + }; +} + +template +struct constant_wrapper: exposition_only::cw_operators { + static constexpr const auto & value = X.data; + using type = constant_wrapper; + using value_type = typename decltype(X)::type; + + template + constexpr auto operator=(R) const noexcept requires requires(value_type x) { x = R::value; } + { return constant_wrapper<[] { auto v = value; return v = R::value; }()>{}; } + + constexpr operator decltype(auto)() const noexcept { return value; } + constexpr decltype(auto) operator()() const noexcept requires (!std::invocable) { return value; } + +#if defined(__cpp_explicit_this_parameter) + using exposition_only::cw_operators::operator(); +#endif +}; + +template + constinit auto cw = constant_wrapper{}; + +} // namespace std + +#endif // ! defined(__cpp_lib_constant_wrapper) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 23f3dde9..c73d50e9 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -17,6 +17,8 @@ #pragma once +#include "../__p0009_bits/macros.hpp" + #include namespace MDSPAN_IMPL_STANDARD_NAMESPACE { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 30eaac84..e7971b24 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -26,7 +26,13 @@ function(mdspan_add_test name) target_compile_definitions(${name} PUBLIC MDSPAN_IMPL_CHECK_PRECONDITION=$ - ) + ) + if(MDSPAN_ENABLE_P3663) + target_compile_definitions(${name} + PUBLIC + MDSPAN_ENABLE_P3663=1 + ) + endif() endfunction() if(MDSPAN_USE_SYSTEM_GTEST) @@ -104,3 +110,7 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL Clang) OR ((CMAKE_CXX_COMPILER_ID STREQUAL GN add_subdirectory(libcxx-backports) endif() endif() + +if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 20)) + mdspan_add_test(test_constant_wrapper) +endif() diff --git a/tests/test_constant_wrapper.cpp b/tests/test_constant_wrapper.cpp new file mode 100644 index 00000000..e91f8fda --- /dev/null +++ b/tests/test_constant_wrapper.cpp @@ -0,0 +1,72 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include +#include +#include + +#if defined(MDSPAN_ENABLE_P3663) +# include "../include/experimental/__p2630_bits/constant_wrapper.hpp" +#else +# error "This test requires that the CMake option MDSPAN_ENABLE_P3663 be ON." +#endif + +namespace { // (anonymous) + +template +using IC = std::integral_constant; + +template +constexpr void test_integral_constant_wrapper(IC ic) { + constexpr auto c = std::cw; + + static_assert(std::is_same_v< + decltype(std::cw), + std::constant_wrapper>); + static_assert(decltype(c)::value == Value); + static_assert(std::is_same_v< + typename decltype(c)::type, + std::constant_wrapper>); + static_assert(std::is_same_v< + typename decltype(c)::value_type, + Integral>); + + constexpr auto c2 = std::cw; + // Casting the arithmetic result back to Integral undoes + // any integer promotions (e.g., short + short -> int). + constexpr auto val_plus_1 = Integral(Value + Integral(1)); + constexpr auto c_assigned = (c2 = IC{}); + static_assert(c_assigned() == val_plus_1); +} + +TEST(TestConstantWrapper, Construction) { + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); + test_integral_constant_wrapper(IC{}); +} + +} // namespace (anonymous) From 00bf62f5988d8c830d8944968de0b575d25e937b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 6 May 2025 15:04:30 -0600 Subject: [PATCH 002/103] P3663: Test that strided_slice works with std::cw Also implement exposition-only __mdspan_integral_constant_like concept, which is useful for canonicalization and Mandates. --- .../__p2630_bits/strided_slice.hpp | 34 +++++-- tests/CMakeLists.txt | 3 + tests/test_strided_slice.cpp | 90 +++++++++++++++++++ 3 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 tests/test_strided_slice.cpp diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index c73d50e9..374945d8 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -18,18 +18,42 @@ #pragma once #include "../__p0009_bits/macros.hpp" +#if defined(MDSPAN_ENABLE_P3663) +# include "constant_wrapper.hpp" +#endif #include namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -namespace { +#if defined(MDSPAN_ENABLE_P3663) +template +concept __mdspan_integral_constant_like = + std::is_integral_v> && + ! std::is_same_v> && + std::convertible_to && + std::equality_comparable_with && + std::bool_constant::value && + std::bool_constant(T()) == T::value>::value; +#endif // MDSPAN_ENABLE_P3663 + +namespace { // (anonymous) template struct mdspan_is_integral_constant: std::false_type {}; template struct mdspan_is_integral_constant>: std::true_type {}; -} + + template + constexpr bool __mdspan_is_index_like_v = + (std::is_integral_v && ! std::is_same_v) || +#if defined(MDSPAN_ENABLE_P3663) + __mdspan_integral_constant_like +#else + mdspan_is_integral_constant::value; +#endif // MDSPAN_ENABLE_P3663 + ; +} // namespace (anonymous) // Slice Specifier allowing for strides and compile time extent template @@ -42,9 +66,9 @@ struct strided_slice { MDSPAN_IMPL_NO_UNIQUE_ADDRESS ExtentType extent{}; MDSPAN_IMPL_NO_UNIQUE_ADDRESS StrideType stride{}; - static_assert(std::is_integral_v || mdspan_is_integral_constant::value); - static_assert(std::is_integral_v || mdspan_is_integral_constant::value); - static_assert(std::is_integral_v || mdspan_is_integral_constant::value); + static_assert(__mdspan_is_index_like_v); + static_assert(__mdspan_is_index_like_v); + static_assert(__mdspan_is_index_like_v); }; } // MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e7971b24..957e9f87 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -114,3 +114,6 @@ endif() if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 20)) mdspan_add_test(test_constant_wrapper) endif() +if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 17)) + mdspan_add_test(test_strided_slice) +endif() diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp new file mode 100644 index 00000000..392a98bf --- /dev/null +++ b/tests/test_strided_slice.cpp @@ -0,0 +1,90 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include +#include + +#include + +namespace { + +template +void test_strided_slice(OffsetType offset, ExtentType extent, StrideType stride) +{ + Kokkos::strided_slice s{offset, extent, stride}; + static_assert(std::is_same_v>); + auto offset2 = s.offset; + static_assert(std::is_same_v); + auto extent2 = s.extent; + static_assert(std::is_same_v); + auto stride2 = s.stride; + static_assert(std::is_same_v); + + ASSERT_EQ(offset2, offset); + ASSERT_EQ(extent2, extent); + ASSERT_EQ(stride2, stride); +} + +template +constexpr auto IC = std::integral_constant{}; + +#if defined(MDSPAN_ENABLE_P3663) +template + requires(! std::is_same_v) +struct my_integral_constant { + static constexpr T value = Value; + constexpr operator T () const { return value; } + static constexpr T operator() () { return value; } +}; + +template +constexpr auto IC2 = my_integral_constant{}; + +static_assert( + std::convertible_to< + my_integral_constant, + decltype(my_integral_constant::value)>); + +static_assert( + std::equality_comparable_with< + my_integral_constant, + decltype(my_integral_constant::value)>); + +static_assert( + Kokkos::__mdspan_integral_constant_like< + my_integral_constant + >); +#endif // MDSPAN_ENABLE_P3663 + +TEST(StridedSlice, WellFormed) { + test_strided_slice(int(1), unsigned(10), long(3)); + test_strided_slice((signed char)(1), (unsigned short)(10), (unsigned long long)(3)); + + test_strided_slice(IC, unsigned(10), long(3)); + test_strided_slice(int(1), IC, long(3)); + test_strided_slice(int(1), unsigned(10), IC); + +#if defined(MDSPAN_ENABLE_P3663) + test_strided_slice(std::cw<1>, unsigned(10), long(3)); + test_strided_slice(int(1), std::cw, long(3)); + test_strided_slice(int(1), unsigned(10), std::cw); + + test_strided_slice(IC2, unsigned(10), long(3)); + test_strided_slice(int(1), IC2, long(3)); + test_strided_slice(int(1), unsigned(10), IC2); +#endif +} + +} // namespace (anonymous) From 62d8bba7eb8f8da3cf77be1279d1fc3015bced6d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 7 May 2025 13:56:28 -0600 Subject: [PATCH 003/103] P3663: Stub submdspan_canonicalize_slices sizeof...(Slices) == 0 case builds and passes tests. --- .../__p2630_bits/submdspan_extents.hpp | 10 +++++ tests/CMakeLists.txt | 3 ++ tests/test_canonicalize_slices.cpp | 40 +++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 tests/test_canonicalize_slices.cpp diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 4fe5dc6e..f33066aa 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include "strided_slice.hpp" #include "../__p0009_bits/utility.hpp" @@ -404,6 +405,15 @@ struct extents_constructor<0, Extents, NewStaticExtents...> { } // namespace detail +template +MDSPAN_INLINE_FUNCTION +constexpr auto +submdspan_canonicalize_slices(const extents&, Slices...) +{ + static_assert(sizeof...(Slices) == 0, "sizeof...(Slices) > 0 not implemented yet"); + return std::tuple{}; +} + // submdspan_extents creates new extents given src extents and submdspan slice // specifiers template diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 957e9f87..4a59960b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -117,3 +117,6 @@ endif() if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 17)) mdspan_add_test(test_strided_slice) endif() +if(MDSPAN_ENABLE_P3663) + mdspan_add_test(test_canonicalize_slices) +endif() diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp new file mode 100644 index 00000000..ed2fea0d --- /dev/null +++ b/tests/test_canonicalize_slices.cpp @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include +#include + +#include + +#if ! defined(MDSPAN_ENABLE_P3663) +# error "This file requires MDSPAN_ENABLE_P3663=ON" +#endif + +namespace { + +TEST(CanonicalizeSlices, Rank0) { + { + Kokkos::extents exts{}; + auto canonical = Kokkos::submdspan_canonicalize_slices(exts); + static_assert(std::is_same_v>); + } + { + Kokkos::extents exts{}; + auto canonical = Kokkos::submdspan_canonicalize_slices(exts); + static_assert(std::is_same_v>); + } +} + +} // namespace (anonymous) From d8332ac268452b978ff7f22a77b94234e3069f55 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 20 May 2025 16:40:35 -0600 Subject: [PATCH 004/103] Support Clang 21.0.0 -std=c++2c Set the following CMake options. -DCMAKE_CXX_FLAGS="-std=c++2c" -DMDSPAN_CXX_STANDARD=26 You'll get a warning, but it seems to work. --- CMakeLists.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 16e77159..ad471d8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ option(MDSPAN_INSTALL_STDMODE_HEADERS "Whether to install headers to emulate sta # Option to override which C++ standard to use set(MDSPAN_CXX_STANDARD DETECT CACHE STRING "Override the default CXX_STANDARD to compile with.") -set_property(CACHE MDSPAN_CXX_STANDARD PROPERTY STRINGS DETECT 14 17 20 23) +set_property(CACHE MDSPAN_CXX_STANDARD PROPERTY STRINGS DETECT 14 17 20 23 26) option(MDSPAN_ENABLE_CONCEPTS "Try to enable concepts support by giving extra flags." On) @@ -64,8 +64,18 @@ elseif(MDSPAN_CXX_STANDARD STREQUAL "23") else() message(FATAL_ERROR "Requested MDSPAN_CXX_STANDARD \"23\" not supported by provided C++ compiler") endif() +elseif(MDSPAN_CXX_STANDARD STREQUAL "26") + if("cxx_std_26" IN_LIST CMAKE_CXX_COMPILE_FEATURES) + message(STATUS "Using C++26 standard") + set(CMAKE_CXX_STANDARD 26) + else() + message(WARNING "Requested MDSPAN_CXX_STANDARD \"26\" not supported by provided C++ compiler") + endif() else() - if("cxx_std_23" IN_LIST CMAKE_CXX_COMPILE_FEATURES) + if("cxx_std_26" IN_LIST CMAKE_CXX_COMPILE_FEATURES) + set(CMAKE_CXX_STANDARD 26) + message(STATUS "Detected support for C++26 standard") + elseif("cxx_std_23" IN_LIST CMAKE_CXX_COMPILE_FEATURES) set(CMAKE_CXX_STANDARD 23) message(STATUS "Detected support for C++23 standard") elseif("cxx_std_20" IN_LIST CMAKE_CXX_COMPILE_FEATURES) From c847a54556ad153501f3af1d86f03700b0677041 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 20 May 2025 17:00:56 -0600 Subject: [PATCH 005/103] Implement check-static-bounds and others * canonical-ice * subtract-ice * de-ice * check-static-bounds --- .../__p2630_bits/submdspan_extents.hpp | 203 +++++++++++++++++- tests/test_canonicalize_slices.cpp | 61 +++++- 2 files changed, 251 insertions(+), 13 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index f33066aa..0c31b8a2 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -405,15 +405,212 @@ struct extents_constructor<0, Extents, NewStaticExtents...> { } // namespace detail -template +#if defined(MDSPAN_ENABLE_P3663) + +namespace impl { + +template +constexpr auto index_cast(OtherIndexType&& i) noexcept { + using OIT = std::remove_cvref_t; + if (std::is_signed_v || std::is_unsigned_v) { + return i; + } + else { + return static_cast(i); + } +} + +template + requires std::convertible_to +constexpr auto canonical_ice(S s) { + static_assert(std::is_signed_v || std::is_unsigned_v); + // TODO Mandates: If S models integral-constant-like and if + // decltype(S::value) is a signed or unsigned integer type, then + // S::value is representable as a value of type IndexType. + // + // TODO Preconditions: If S is a signed or unsigned integer type, + // then s is representable as a value of type IndexType. + if constexpr (__mdspan_integral_constant_like) { + return std::constant_wrapper(S::value), IndexType>{}; + } + else { + return index_cast(s); + } +} + +template +constexpr auto subtract_ice(X x, Y y) { + return canonical_ice(x) - canonical_ice(y); +} + +template +constexpr T de_ice(T val) { + return val; +} + +template<__mdspan_integral_constant_like T> +constexpr auto de_ice(T) { + return T::value; +} + +enum class check_static_bounds_result { + in_bounds, + out_of_bounds, + unknown +}; + +// TODO It's impossible to write an "if constexpr" check for +// "structured binding into two elements is well-formed." Thus, we +// write check_static_bounds only for canonical slice types as inputs +// -- that is, we invoke check_static_bounds post-canonicalization. +// +// This may suggest a change in wording, though only if +// we need to call check_static_bounds on pre-canonicalized slices. + +template + constexpr check_static_bounds_result check_static_bounds( + const extents&, Slices... slices) +{ + auto s_k = slices...[k]; + using S_k = decltype(s_k); + if constexpr (std::is_convertible_v) { + return check_static_bounds_result::in_bounds; + } + else if constexpr (std::is_convertible_v) { + if constexpr (__mdspan_integral_constant_like) { + if constexpr (de_ice(S_k{}) < 0) { + return check_static_bounds_result::out_of_bounds; // 14.3.1 + } + else if constexpr (Exts...[k] != dynamic_extent && Exts...[k] <= de_ice(S_k{})) { + return check_static_bounds_result::out_of_bounds; + } + else if constexpr (Exts...[k] != dynamic_extent && de_ice(s_k) < Exts...[k]) { + return check_static_bounds_result::in_bounds; + } + else { + return check_static_bounds_result::unknown; + } + } + else { // integer, not integral-constant-like (14.5 case) + return check_static_bounds_result::unknown; + } + } + else if constexpr (detail::is_strided_slice::value) { + if constexpr (__mdspan_integral_constant_like) { + if constexpr (de_ice(s_k.offset) < 0) { + return check_static_bounds_result::out_of_bounds; // 14.3.1 + } + else if constexpr ( + Exts...[k] != dynamic_extent && Exts...[k] < de_ice(s_k.offset)) + { + return check_static_bounds_result::out_of_bounds; // 14.3.2 + } + else if constexpr ( + __mdspan_integral_constant_like && + de_ice(s_k.offset) + de_ice(s_k.extent) < 0) + { + return check_static_bounds_result::out_of_bounds; // 14.3.3 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + __mdspan_integral_constant_like && + Exts...[k] < de_ice(s_k.offset) + de_ice(s_k.extent)) + { + return check_static_bounds_result::out_of_bounds; // 14.3.4 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + __mdspan_integral_constant_like && + 0 <= de_ice(s_k.offset) && + de_ice(s_k.offset) <= de_ice(s_k.offset) + de_ice(s_k.extent) && + de_ice(s_k.offset) + de_ice(s_k.extent) <= Exts...[k]) + { + return check_static_bounds_result::in_bounds; // 14.3.5 + } + else { + return check_static_bounds_result::unknown; // 14.3.6 + } + } + else { // strided_slice but offset_type isn't integral-constant-like + return check_static_bounds_result::unknown; // 14.5 + } + } + else { // 14.4 + // NOTE: This case means that check_static_bounds cannot be + // well-formed if it didn't fall into one of the above cases + // and if it can't be destructured into two elements. + auto [s_k0, s_k1] = s_k; + using S_k0 = decltype(s_k0); + using S_k1 = decltype(s_k1); + if constexpr (__mdspan_integral_constant_like) { + if constexpr (de_ice(S_k0{}) < 0) { + return check_static_bounds_result::out_of_bounds; // 14.4.1 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + Exts...[k] < de_ice(S_k0{})) + { + return check_static_bounds_result::out_of_bounds; // 14.4.2 + } + else if constexpr ( + __mdspan_integral_constant_like && + de_ice(S_k1{}) < de_ice(S_k0{})) + { + return check_static_bounds_result::out_of_bounds; // 14.4.3 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + __mdspan_integral_constant_like && + Exts...[k] < de_ice(S_k1{})) + { + return check_static_bounds_result::out_of_bounds; // 14.4.4 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + __mdspan_integral_constant_like && + 0 <= de_ice(S_k0{}) && + de_ice(S_k0{}) <= de_ice(S_k1{}) && + de_ice(S_k1{}) <= Exts...[k]) + { + return check_static_bounds_result::in_bounds; // 14.4.5 + } + else { + return check_static_bounds_result::unknown; // 14.4.6 + } + } + else { // S_k0 not integral-constant-like + return check_static_bounds_result::unknown; + } + } +} +} // namespace impl + +template MDSPAN_INLINE_FUNCTION constexpr auto -submdspan_canonicalize_slices(const extents&, Slices...) +submdspan_canonicalize_slices(const extents&) { - static_assert(sizeof...(Slices) == 0, "sizeof...(Slices) > 0 not implemented yet"); return std::tuple{}; } +template +MDSPAN_INLINE_FUNCTION +constexpr auto +submdspan_canonicalize_slices(const extents&, full_extent_t) +{ + return std::tuple{full_extent}; +} + +template +MDSPAN_INLINE_FUNCTION +constexpr auto +submdspan_canonicalize_slices(const extents&, Slices... slices) +{ + static_assert(sizeof...(Slices) == 0, "General case not implemented yet"); + return std::tuple{slices...}; +} +#endif // MDSPAN_ENABLE_P3663 + // submdspan_extents creates new extents given src extents and submdspan slice // specifiers template diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index ed2fea0d..db0588ac 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -24,17 +24,58 @@ namespace { +constexpr bool slice_equal(Kokkos::full_extent_t, Kokkos::full_extent_t) { + return true; +} + +template +constexpr bool slice_equal(Kokkos::full_extent_t, const Right&) { + return std::is_convertible_v; +} + +template +constexpr bool slice_equal(const Left&, Kokkos::full_extent_t) { + return std::is_convertible_v; +} + +template +constexpr bool slice_equal(const Left&, const Right&) { + static_assert(false, "slice_equal not implemented for this case"); + return false; +} + +template +void +test_canonicalize_slices( + const ExpectedResult& expected_result, + const InputExtents& input_extents, + Slices... slices) +{ + auto result = Kokkos::submdspan_canonicalize_slices(input_extents, slices...); + [&] (std::index_sequence) { + auto test_one = [&] (std::integral_constant) { + using std::get; + auto left = get(result); + auto right = get(expected_result); + const bool result = slice_equal(left, right); + // Below isn't well-formed for some reason -- a compiler bug? + //const bool result = slice_equal(get(result), get(expected_result)); + ASSERT_TRUE(result) << " failed for k=" << Ind; + }; + (test_one(std::integral_constant{}), ...); + } (std::make_index_sequence()); +} + TEST(CanonicalizeSlices, Rank0) { - { - Kokkos::extents exts{}; - auto canonical = Kokkos::submdspan_canonicalize_slices(exts); - static_assert(std::is_same_v>); - } - { - Kokkos::extents exts{}; - auto canonical = Kokkos::submdspan_canonicalize_slices(exts); - static_assert(std::is_same_v>); - } + test_canonicalize_slices(std::tuple{}, Kokkos::extents{}); + test_canonicalize_slices(std::tuple{}, Kokkos::extents{}); +} + +TEST(CanonicalizeSlices, Rank1_full) { + constexpr auto full = Kokkos::full_extent; + constexpr auto expected_result = std::tuple{full}; + test_canonicalize_slices(expected_result, Kokkos::extents{}, full); + test_canonicalize_slices(expected_result, Kokkos::extents{}, full); } } // namespace (anonymous) From 02595945b3f3a5c2cf6b4bfc2eebfc5968cee39e Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 21 May 2025 15:41:36 -0600 Subject: [PATCH 006/103] Fix submdspan_canonicalize_slices for one slice --- .../__p2630_bits/submdspan_extents.hpp | 200 ++++++++++++++---- .../__p2630_bits/submdspan_mapping.hpp | 31 +++ tests/test_canonicalize_slices.cpp | 26 ++- 3 files changed, 210 insertions(+), 47 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 0c31b8a2..efbd14f3 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -407,17 +407,20 @@ struct extents_constructor<0, Extents, NewStaticExtents...> { #if defined(MDSPAN_ENABLE_P3663) -namespace impl { +namespace detail { template + requires(std::is_signed_v> || + std::is_unsigned_v>) constexpr auto index_cast(OtherIndexType&& i) noexcept { - using OIT = std::remove_cvref_t; - if (std::is_signed_v || std::is_unsigned_v) { - return i; - } - else { - return static_cast(i); - } + return i; +} + +template + requires(! std::is_signed_v> && + !std::is_unsigned_v>) +constexpr auto index_cast(OtherIndexType&& i) noexcept { + return static_cast(i); } template @@ -430,11 +433,16 @@ constexpr auto canonical_ice(S s) { // // TODO Preconditions: If S is a signed or unsigned integer type, // then s is representable as a value of type IndexType. + // + // TODO NOT IN PROPOSAL: index-cast result needs to be + // cast again to IndexType, so that we don't get a weird + // constant_wrapper whose value has a different type + // than the second template argument. if constexpr (__mdspan_integral_constant_like) { - return std::constant_wrapper(S::value), IndexType>{}; + return std::constant_wrapper(index_cast(S::value)), IndexType>{}; } else { - return index_cast(s); + return static_cast(index_cast(s)); } } @@ -461,18 +469,30 @@ enum class check_static_bounds_result { // TODO It's impossible to write an "if constexpr" check for // "structured binding into two elements is well-formed." Thus, we -// write check_static_bounds only for canonical slice types as inputs -// -- that is, we invoke check_static_bounds post-canonicalization. -// -// This may suggest a change in wording, though only if -// we need to call check_static_bounds on pre-canonicalized slices. - -template +// must assume that the input Slices are all valid slice types. +// One way to do that is to invoke this only post-canonicalization. +// Another way is to rely on submdspan_canonicalize_slices to be +// ill-formed if called with an invalid slice type. We can do the +// latter in submdspan_canonicalize_slices by expressing the four +// possible categories of valid slice types in if constexpr, with +// the final else attempting the structured binding into two elements. + +// TODO NOT IN PROPOSAL: Consider rewriting to use only $S_k$ +// and not $s_k$ in check-static-bounds, since we can't use +// the actual function parameter in a function that we want +// to work in a constant expression. + +// TODO NOT IN PROPOSAL: Taking slices parameter(s) makes use +// of check_static_bounds not a constant expression. +// Instead, make Slices... a template parameter pack. + +// TODO NOT IN PROPOSAL: It's easier to have a single Slice +// as a template parameter pack. This makes sense because +// the function only tests one slice (the k-th one) anyway. +template constexpr check_static_bounds_result check_static_bounds( - const extents&, Slices... slices) + const extents&) { - auto s_k = slices...[k]; - using S_k = decltype(s_k); if constexpr (std::is_convertible_v) { return check_static_bounds_result::in_bounds; } @@ -484,7 +504,7 @@ template else if constexpr (Exts...[k] != dynamic_extent && Exts...[k] <= de_ice(S_k{})) { return check_static_bounds_result::out_of_bounds; } - else if constexpr (Exts...[k] != dynamic_extent && de_ice(s_k) < Exts...[k]) { + else if constexpr (Exts...[k] != dynamic_extent && de_ice(S_k{}) < Exts...[k]) { return check_static_bounds_result::in_bounds; } else { @@ -495,35 +515,35 @@ template return check_static_bounds_result::unknown; } } - else if constexpr (detail::is_strided_slice::value) { + else if constexpr (is_strided_slice::value) { if constexpr (__mdspan_integral_constant_like) { - if constexpr (de_ice(s_k.offset) < 0) { + if constexpr (de_ice(S_k{}.offset) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.1 } else if constexpr ( - Exts...[k] != dynamic_extent && Exts...[k] < de_ice(s_k.offset)) + Exts...[k] != dynamic_extent && Exts...[k] < de_ice(S_k{}.offset)) { return check_static_bounds_result::out_of_bounds; // 14.3.2 } else if constexpr ( __mdspan_integral_constant_like && - de_ice(s_k.offset) + de_ice(s_k.extent) < 0) + de_ice(S_k{}.offset) + de_ice(S_k{}.extent) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.3 } else if constexpr ( Exts...[k] != dynamic_extent && __mdspan_integral_constant_like && - Exts...[k] < de_ice(s_k.offset) + de_ice(s_k.extent)) + Exts...[k] < de_ice(S_k{}.offset) + de_ice(S_k{}.extent)) { return check_static_bounds_result::out_of_bounds; // 14.3.4 } else if constexpr ( Exts...[k] != dynamic_extent && __mdspan_integral_constant_like && - 0 <= de_ice(s_k.offset) && - de_ice(s_k.offset) <= de_ice(s_k.offset) + de_ice(s_k.extent) && - de_ice(s_k.offset) + de_ice(s_k.extent) <= Exts...[k]) + 0 <= de_ice(S_k{}.offset) && + de_ice(S_k{}.offset) <= de_ice(S_k{}.offset) + de_ice(S_k{}.extent) && + de_ice(S_k{}.offset) + de_ice(S_k{}.extent) <= Exts...[k]) { return check_static_bounds_result::in_bounds; // 14.3.5 } @@ -539,7 +559,9 @@ template // NOTE: This case means that check_static_bounds cannot be // well-formed if it didn't fall into one of the above cases // and if it can't be destructured into two elements. - auto [s_k0, s_k1] = s_k; + + // We can't use s_k here, because it's not a constant expression. + auto [s_k0, s_k1] = S_k{}; using S_k0 = decltype(s_k0); using S_k1 = decltype(s_k1); if constexpr (__mdspan_integral_constant_like) { @@ -583,31 +605,127 @@ template } } } -} // namespace impl -template +template +constexpr bool is_constant_wrapper = false; + +template +constexpr bool is_constant_wrapper> = true; + +// [mdspan.sub.slices] 1 +template +constexpr bool is_canonical_submdspan_index_type = + std::is_same_v || ( + is_constant_wrapper && + std::is_same_v + ); + +// [mdspan.sub.slices] 2 +template +MDSPAN_INLINE_FUNCTION +constexpr bool is_canonical_slice_type() { + if constexpr ( + std::is_same_v || // 2.1 + is_canonical_submdspan_index_type) // 2.2 + { + return true; + } + else if constexpr (is_strided_slice::value) { // 2.3 + if constexpr ( // 2.3.1 + is_canonical_submdspan_index_type && + is_canonical_submdspan_index_type && + is_canonical_submdspan_index_type) + { + if constexpr ( + is_constant_wrapper && + is_constant_wrapper) + { + constexpr auto Stride = de_ice(typename Slice::stride_type{}); + constexpr auto Extent = de_ice(typename Slice::extent_type{}); + return Extent == 0 || Stride > 0; // 2.3.2 + } + else { + return false; + } + } + else { + return false; + } + } + else { + return false; + } +} + +// [mdspan.sub.slices] 3 +template MDSPAN_INLINE_FUNCTION constexpr auto -submdspan_canonicalize_slices(const extents&) +is_canonical_kth_submdspan_slice_type(const extents& exts, Slice slice) { - return std::tuple{}; + if constexpr (! is_canonical_slice_type()) { + return false; // 3.1 + } + else { // 3.2 + return check_static_bounds(exts) != check_static_bounds_result::out_of_bounds; + } } -template +// [mdspan.sub.slices] 11 +template MDSPAN_INLINE_FUNCTION constexpr auto -submdspan_canonicalize_slices(const extents&, full_extent_t) -{ - return std::tuple{full_extent}; +submdspan_canonicalize_one_slice(const extents& exts, Slice s) { + // Part of [mdspan.sub.slices] 9. + // This could be combined with the if constexpr branches below. + // + // NOTE This is not a constant expression (because it takes exts). + static_assert(check_static_bounds(exts) != check_static_bounds_result::out_of_bounds); + + // TODO Check Precondition that s is a valid k-th submdspan slice for exts. + + if constexpr (std::is_convertible_v) { + return full_extent; // 11.1 + } + else if constexpr (std::is_convertible_v) { + return canonical_ice(s); // 11.2 + } + else if constexpr (is_strided_slice::value) { // 11.3 + return strided_slice{ + .offset = canonical_ice(s.offset), + .extent = canonical_ice(s.extent), + .stride = canonical_ice(s.stride) + }; + } + else { // 11.4 + auto [s_k0, s_k1] = s; + using S_k0 = decltype(s_k0); + using S_k1 = decltype(s_k1); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + return strided_slice{ + .offset = canonical_ice(s_k0), + .extent = subtract_ice(s_k0, s_k1), + .stride = std::cw + }; + } } +} // namespace detail + template + requires (sizeof...(Slices) == sizeof...(Extents)) // [mdspan.sub.slices] 8 MDSPAN_INLINE_FUNCTION constexpr auto -submdspan_canonicalize_slices(const extents&, Slices... slices) +submdspan_canonicalize_slices(const extents& exts, Slices... slices) { - static_assert(sizeof...(Slices) == 0, "General case not implemented yet"); - return std::tuple{slices...}; + return [&](std::index_sequence) { + return std::tuple{ + // This is ill-formed if slices...[Inds] is not a valid slice type. + // That implements the Mandates clause of [mdspan.sub.slices] 9. + detail::submdspan_canonicalize_one_slice(exts, slices...[Inds])... + }; + } (std::make_index_sequence{}); } #endif // MDSPAN_ENABLE_P3663 diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 82f9193a..46c1a4d0 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -42,6 +42,7 @@ #endif namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + //****************************************** // Return type of submdspan_mapping overloads //****************************************** @@ -52,6 +53,36 @@ template struct submdspan_mapping_result { namespace detail { +#if defined(MDSPAN_ENABLE_P3663) +template +constexpr auto +submdspan_mapping_with_full_extents(const LayoutMapping& mapping) { + using extents_type = typename LayoutMapping::extents_type; + return [&] (std::index_sequence) { + return submdspan_mapping(mapping, ((void) Inds, full_extent)...); + } (std::make_index_sequence{}); +} + +template +constexpr bool is_submdspan_mapping_result = false; + +template +constexpr bool is_submdspan_mapping_result< + submdspan_mapping_result> = true; + +template +concept submdspan_mapping_result = + is_submdspan_mapping_result; + +template +concept mapping_sliceable_with_full_extents = + requires(const LayoutMapping& mapping) { + { + submdspan_mapping_with_full_extents(mapping) + } -> submdspan_mapping_result; + }; +#endif // MDSPAN_ENABLE_P3663 + // We use const Slice& and not Slice&& because the various // submdspan_mapping_impl overloads use their slices arguments // multiple times. This makes perfect forwarding not useful, but we diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index db0588ac..5b69933e 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -24,6 +24,12 @@ namespace { +template +constexpr bool slice_equal(const T& left, const T& right) { + return left == right; +} + +// full_extent_t lacks operator== constexpr bool slice_equal(Kokkos::full_extent_t, Kokkos::full_extent_t) { return true; } @@ -38,12 +44,6 @@ constexpr bool slice_equal(const Left&, Kokkos::full_extent_t) { return std::is_convertible_v; } -template -constexpr bool slice_equal(const Left&, const Right&) { - static_assert(false, "slice_equal not implemented for this case"); - return false; -} - template void test_canonicalize_slices( @@ -78,4 +78,18 @@ TEST(CanonicalizeSlices, Rank1_full) { test_canonicalize_slices(expected_result, Kokkos::extents{}, full); } +TEST(CanonicalizeSlices, Rank1_integer_dynamic) { + constexpr auto slice0 = int(7u); + constexpr auto expected_slices = std::tuple{size_t(7u)}; + constexpr auto exts = Kokkos::extents{}; + test_canonicalize_slices(expected_slices, exts, slice0); +} + +TEST(CanonicalizeSlices, Rank1_integer_static) { + constexpr auto slice0 = std::integral_constant{}; + constexpr auto expected_slices = std::tuple{std::cw}; + constexpr auto exts = Kokkos::extents{}; + test_canonicalize_slices(expected_slices, exts, slice0); +} + } // namespace (anonymous) From 7092b24afb36597cde92d7fd73b1338a741d171c Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 21 May 2025 16:36:51 -0600 Subject: [PATCH 007/103] Fix canonical_ice & 1 strided_slice submdspan_canonicalize_slices now works for one strided_slice. Fix canonical_ice (wrong order in subtraction; wording is fine). --- .../__p2630_bits/submdspan_extents.hpp | 19 +++- tests/test_canonicalize_slices.cpp | 96 +++++++++++++++++++ 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index efbd14f3..5a8fee44 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -448,7 +448,7 @@ constexpr auto canonical_ice(S s) { template constexpr auto subtract_ice(X x, Y y) { - return canonical_ice(x) - canonical_ice(y); + return canonical_ice(y) - canonical_ice(x); } template @@ -560,10 +560,19 @@ template // well-formed if it didn't fall into one of the above cases // and if it can't be destructured into two elements. - // We can't use s_k here, because it's not a constant expression. - auto [s_k0, s_k1] = S_k{}; - using S_k0 = decltype(s_k0); - using S_k1 = decltype(s_k1); + // We can't use s_k on the right-hand side here, because it's not a constant expression. + // We can't use S_k{} here either, because that presumes that it's default constructible. + // We can only use std::declval() in an unevaluated context. + auto get_first = [] (S_k s_k) { + auto [s_k0, _] = s_k; + return s_k0; + }; + auto get_second = [] (S_k s_k) { + auto [_, s_k1] = s_k; + return s_k1; + }; + using S_k0 = decltype(get_first(std::declval())); + using S_k1 = decltype(get_second(std::declval())); if constexpr (__mdspan_integral_constant_like) { if constexpr (de_ice(S_k0{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.4.1 diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index 5b69933e..d4e4e6ee 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -22,6 +22,61 @@ # error "This file requires MDSPAN_ENABLE_P3663=ON" #endif +namespace my_test { + +template +struct my_aggregate_pair { + First first; + Second second; +}; + +// Not an aggregate, to force use of the tuple protocol. +template +class my_nonaggregate_pair { +public: + constexpr my_nonaggregate_pair(First first, Second second) + : first_(first), second_(second) + {} + + template + constexpr decltype(auto) get(this Self&& self) { + if constexpr (Index == 0) { + return self.first_; + } + else if constexpr (Index == 1) { + return self.second_; + } + else { + static_assert(false, "Invalid index"); + } + } + +private: + First first_; + Second second_; +}; + +} // namespace my_test + +template +struct std::tuple_size> + : std::integral_constant {}; + +template +struct std::tuple_element> { + static_assert(false, "Invalid index"); +}; + +template +struct std::tuple_element<0, my_test::my_nonaggregate_pair> { + using type = First; +}; + +template +struct std::tuple_element<1, my_test::my_nonaggregate_pair> { + using type = Second; +}; + namespace { template @@ -44,6 +99,14 @@ constexpr bool slice_equal(const Left&, Kokkos::full_extent_t) { return std::is_convertible_v; } +template +constexpr bool slice_equal( + const Kokkos::strided_slice& left, + const Kokkos::strided_slice& right) +{ + return left.offset == right.offset && left.extent == right.extent && left.stride == right.stride; +} + template void test_canonicalize_slices( @@ -92,4 +155,37 @@ TEST(CanonicalizeSlices, Rank1_integer_static) { test_canonicalize_slices(expected_slices, exts, slice0); } +TEST(CanonicalizeSlices, Rank1_pair) { + constexpr auto slice0 = std::pair{std::integral_constant{}, 11}; + constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ + .offset = std::cw, + .extent = size_t(4u), // 11 - 7 + .stride = std::cw + }}; + constexpr auto exts = Kokkos::extents{}; + test_canonicalize_slices(expected_slices, exts, slice0); +} + +TEST(CanonicalizeSlices, Rank1_aggregate_pair) { + constexpr auto slice0 = my_test::my_aggregate_pair{7, 11}; + constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ + .offset = size_t(7u), + .extent = (size_t(11u) - size_t(7u)), + .stride = std::cw + }}; + constexpr auto exts = Kokkos::extents{}; + test_canonicalize_slices(expected_slices, exts, slice0); +} + +TEST(CanonicalizeSlices, Rank1_nonaggregate_pair) { + constexpr auto slice0 = my_test::my_nonaggregate_pair(7, 11); + constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ + .offset = size_t(7u), + .extent = (size_t(11u) - size_t(7u)), + .stride = std::cw + }}; + constexpr auto exts = Kokkos::extents{}; + test_canonicalize_slices(expected_slices, exts, slice0); +} + } // namespace (anonymous) From 825e659680eb4a59ec0e28966a435e1da13521ca Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 21 May 2025 16:49:26 -0600 Subject: [PATCH 008/103] Put dims in the correct namespace --- include/experimental/__p2389_bits/dims.hpp | 2 -- tests/test_dims.cpp | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/experimental/__p2389_bits/dims.hpp b/include/experimental/__p2389_bits/dims.hpp index 00045215..8f991f51 100644 --- a/include/experimental/__p2389_bits/dims.hpp +++ b/include/experimental/__p2389_bits/dims.hpp @@ -18,11 +18,9 @@ // backward compatibility import into experimental namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { template< ::std::size_t Rank, class IndexType = std::size_t> using dims = :: MDSPAN_IMPL_STANDARD_NAMESPACE :: dextents; -} // namespace MDSPAN_IMPL_PROPOSED_NAMESPACE } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/tests/test_dims.cpp b/tests/test_dims.cpp index 7ceb6f35..912530cf 100644 --- a/tests/test_dims.cpp +++ b/tests/test_dims.cpp @@ -31,7 +31,7 @@ static constexpr bool is_extents_v< template void test_dims_with_one_template_argument() { - using d = MDSPAN_IMPL_STANDARD_NAMESPACE :: MDSPAN_IMPL_PROPOSED_NAMESPACE :: dims; + using d = MDSPAN_IMPL_STANDARD_NAMESPACE :: dims; static_assert(test::is_extents_v, "dims is not an extents specialization"); static_assert(std::is_same::value, "dims::index_type is wrong"); static_assert(d::rank() == Rank, "dims::rank() is wrong"); @@ -40,7 +40,7 @@ void test_dims_with_one_template_argument() template void test_dims_with_two_template_arguments() { - using d = MDSPAN_IMPL_STANDARD_NAMESPACE :: MDSPAN_IMPL_PROPOSED_NAMESPACE :: dims; + using d = MDSPAN_IMPL_STANDARD_NAMESPACE :: dims; static_assert(test::is_extents_v, "dims is not an extents specialization"); static_assert(std::is_same::value, "dims::index_type is wrong"); static_assert(d::rank() == Rank, "dims::rank() is wrong"); From cc8b3ef43cb0b4f8c70efd9b2b75eee3913a0efe Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 21 May 2025 16:49:41 -0600 Subject: [PATCH 009/103] Add submdspan_canonicalize_slices rank 2 test --- tests/test_canonicalize_slices.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index d4e4e6ee..3d622578 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -188,4 +188,11 @@ TEST(CanonicalizeSlices, Rank1_nonaggregate_pair) { test_canonicalize_slices(expected_slices, exts, slice0); } +TEST(CanonicalizeSlices, Rank2_full) { + constexpr auto full = Kokkos::full_extent; + constexpr auto expected_result = std::tuple{full, full}; + test_canonicalize_slices(expected_result, Kokkos::extents{}, full, full); + test_canonicalize_slices(expected_result, Kokkos::dims<2>{11u, 13u}, full, full); +} + } // namespace (anonymous) From 71b85baa9ede6e1f56761f51214f90aee5cba1c6 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 22 May 2025 16:21:50 -0600 Subject: [PATCH 010/103] CHECKPOINT Attempt to change over submdspan to use canonicalization without doing much else. It fails to build; a lot of layout mappings seem to get the wrong mapping result type. It builds and passes tests with P3663 support off. --- .../experimental/__p2630_bits/submdspan.hpp | 12 ++++++ .../__p2630_bits/submdspan_extents.hpp | 40 +++++++++++++++++++ .../__p2630_bits/submdspan_mapping.hpp | 4 +- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index abddd0b5..6c2f6dfa 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -26,6 +26,17 @@ MDSPAN_INLINE_FUNCTION constexpr auto submdspan(const mdspan &src, SliceSpecifiers... slices) { +#if defined(MDSPAN_ENABLE_P3663) + + auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); + // TODO FIX IN PROPOSAL: [canonical_]slices (incorrect formatting). + auto sub_map_result = submdspan_mapping(src.mapping(), canonical_slices...); + // TODO FIX IN PROPOSAL: It's src.data_handle(), not src.data(). + // Missing "typename" before AccessorPolicy::offset_policy. + return mdspan(src.accessor().offset(src.data_handle(), sub_map_result.offset), + sub_map_result.mapping, + typename AccessorPolicy::offset_policy(src.accessor())); +#else const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...); // NVCC has a problem with the deduction so lets figure out the type using sub_mapping_t = std::remove_cv_t; @@ -36,5 +47,6 @@ submdspan(const mdspan &src, src.accessor().offset(src.data_handle(), sub_submdspan_mapping_result.offset), sub_submdspan_mapping_result.mapping, sub_accessor_t(src.accessor())); +#endif } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 5a8fee44..1feb663f 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -55,6 +55,8 @@ template struct is_strided_slice< strided_slice> : std::true_type {}; +// TODO We won't even need index_pair_like when we're done. + // Helper for identifying valid pair like things template struct index_pair_like : std::false_type {}; @@ -86,6 +88,7 @@ struct index_pair_like, IndexType> { static constexpr bool value = std::is_convertible_v; }; + // first_of(slice): getting begin of slice specifier range MDSPAN_TEMPLATE_REQUIRES( class Integral, @@ -96,12 +99,22 @@ constexpr Integral first_of(const Integral &i) { return i; } +// FIXME Pre-P3663, first_of should work on any integral-constant-like. + template MDSPAN_INLINE_FUNCTION constexpr Integral first_of(const std::integral_constant&) { return integral_constant(); } +#if defined(MDSPAN_ENABLE_P3663) +template +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(const std::constant_wrapper&) { + return Value; +} +#endif + MDSPAN_INLINE_FUNCTION constexpr integral_constant first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { @@ -467,6 +480,17 @@ enum class check_static_bounds_result { unknown }; +// Clang 21.0.0 does not define __cpp_lib_tuple_like, so it does not +// support the tuple protocol for std::complex. Interestingly, it permits +// structured binding, but decomposes it into one element, not two. +// We work around with a special canonicalization case. +#if ! defined(__cpp_lib_tuple_like) || (__cpp_lib_tuple_like < 202311L) +template +constexpr bool is_std_complex = false; +template +constexpr bool is_std_complex> = true; +#endif + // TODO It's impossible to write an "if constexpr" check for // "structured binding into two elements is well-formed." Thus, we // must assume that the input Slices are all valid slice types. @@ -555,6 +579,13 @@ template return check_static_bounds_result::unknown; // 14.5 } } +#if ! defined(__cpp_lib_tuple_like) || (__cpp_lib_tuple_like < 202311L) + else if constexpr (is_std_complex) { + // std::complex only has run-time slice values, so we can't + // check at compile time whether they are in bounds. + return check_static_bounds_result::unknown; + } +#endif else { // 14.4 // NOTE: This case means that check_static_bounds cannot be // well-formed if it didn't fall into one of the above cases @@ -706,6 +737,15 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli .stride = canonical_ice(s.stride) }; } +#if ! defined(__cpp_lib_tuple_like) || (__cpp_lib_tuple_like < 202311L) + else if constexpr (detail::is_std_complex) { + return strided_slice{ + .offset = canonical_ice(s.real()), + .extent = canonical_ice(s.imag()), + .stride = std::cw + }; + } +#endif else { // 11.4 auto [s_k0, s_k1] = s; using S_k0 = decltype(s_k0); diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 46c1a4d0..5a829f06 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -92,8 +92,8 @@ template MDSPAN_INLINE_FUNCTION constexpr bool one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { using common_t = - std::common_type_t; - return static_cast(detail::first_of(slice)) == + std::common_type_t; + return static_cast(first_of(slice)) == static_cast(ext); } From b049333097a85b761f32fb0859528deba668a06d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 22 May 2025 16:53:44 -0600 Subject: [PATCH 011/103] IT BUILDS -- pulled in R0 impl --- .../__p2630_bits/strided_slice.hpp | 9 +- .../experimental/__p2630_bits/submdspan.hpp | 12 +- .../__p2630_bits/submdspan_extents.hpp | 121 +++++++++++++++++- 3 files changed, 132 insertions(+), 10 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 374945d8..0422dfb3 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -41,17 +41,18 @@ namespace { // (anonymous) template struct mdspan_is_integral_constant: std::false_type {}; +#if defined(MDSPAN_ENABLE_P3663) + template<__mdspan_integral_constant_like T> + struct mdspan_is_integral_constant : std::true_type {}; +#else template struct mdspan_is_integral_constant>: std::true_type {}; +#endif template constexpr bool __mdspan_is_index_like_v = (std::is_integral_v && ! std::is_same_v) || -#if defined(MDSPAN_ENABLE_P3663) - __mdspan_integral_constant_like -#else mdspan_is_integral_constant::value; -#endif // MDSPAN_ENABLE_P3663 ; } // namespace (anonymous) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index 6c2f6dfa..c3d71aec 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -26,9 +26,12 @@ MDSPAN_INLINE_FUNCTION constexpr auto submdspan(const mdspan &src, SliceSpecifiers... slices) { + #if defined(MDSPAN_ENABLE_P3663) + [[maybe_unused]] auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); + static_assert(sizeof...(canonical_slices) == sizeof...(slices)); - auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); +#if 0 // TODO FIX IN PROPOSAL: [canonical_]slices (incorrect formatting). auto sub_map_result = submdspan_mapping(src.mapping(), canonical_slices...); // TODO FIX IN PROPOSAL: It's src.data_handle(), not src.data(). @@ -36,7 +39,10 @@ submdspan(const mdspan &src, return mdspan(src.accessor().offset(src.data_handle(), sub_map_result.offset), sub_map_result.mapping, typename AccessorPolicy::offset_policy(src.accessor())); -#else +#endif // 0 +#endif + +//#else const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...); // NVCC has a problem with the deduction so lets figure out the type using sub_mapping_t = std::remove_cv_t; @@ -47,6 +53,6 @@ submdspan(const mdspan &src, src.accessor().offset(src.data_handle(), sub_submdspan_mapping_result.offset), sub_submdspan_mapping_result.mapping, sub_accessor_t(src.accessor())); -#endif +//#endif } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 1feb663f..6b33f1f3 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -101,6 +101,8 @@ constexpr Integral first_of(const Integral &i) { // FIXME Pre-P3663, first_of should work on any integral-constant-like. +// NOTE This is technically not conforming. +// Pre-P3663, first_of should work on any integral-constant-like type. template MDSPAN_INLINE_FUNCTION constexpr Integral first_of(const std::integral_constant&) { @@ -108,10 +110,16 @@ constexpr Integral first_of(const std::integral_constant&) { } #if defined(MDSPAN_ENABLE_P3663) -template +// NOTE (mfh 2025/03/07) Canonicalize integral-constant-like +// to std::integral_constant, just to get things working for now. +// Later, go back and replace all use of std::integral_constant +// in the various mappings' submdspan_mapping_impl functions with +// std::constant_wrapper. +template<__mdspan_integral_constant_like T> MDSPAN_INLINE_FUNCTION -constexpr Integral first_of(const std::constant_wrapper&) { - return Value; +constexpr std::integral_constant, T::value> +first_of(const T&) { + return {}; } #endif @@ -121,6 +129,8 @@ first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { return integral_constant(); } +// TODO P3663 won't need this overload, +// because first_of should never see pair-like types. MDSPAN_TEMPLATE_REQUIRES( class Slice, /* requires */(index_pair_like::value) @@ -174,6 +184,16 @@ constexpr Integral return i; } +#if defined(MDSPAN_ENABLE_P3663) +// NOTE (mfh 2025/03/07) This should probably replace the above overload. +template<__mdspan_integral_constant_like T, class Extents, class Integral> + requires(std::is_convertible_v) +MDSPAN_INLINE_FUNCTION +constexpr Integral last_of(T, const Extents&, const Integral& i) { + return i; +} +#endif // MDSPAN_ENABLE_P3663 + MDSPAN_TEMPLATE_REQUIRES( size_t k, class Extents, class Slice, /* requires */(index_pair_like::value) @@ -184,6 +204,21 @@ constexpr auto last_of(std::integral_constant, const Extents &, return get<1>(i); } +// NOTE P3663 should not need this overload, +// because last_of should never see a pair-like type. +#if defined(MDSPAN_ENABLE_P3663) +MDSPAN_TEMPLATE_REQUIRES( + __mdspan_integral_constant_like T, class Extents, class Slice, + /* requires */(index_pair_like::value) +) +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(T, const Extents &, + const Slice &i) { + using std::get; + return get<1>(i); +} +#endif + MDSPAN_TEMPLATE_REQUIRES( size_t k, class Extents, class IdxT1, class IdxT2, /* requires */ (index_pair_like, size_t>::value) @@ -192,6 +227,16 @@ constexpr auto last_of(std::integral_constant, const Extents &, const return get<1>(i); } +#if defined(MDSPAN_ENABLE_P3663) +MDSPAN_TEMPLATE_REQUIRES( + __mdspan_integral_constant_like T, class Extents, class IdxT1, class IdxT2, + /* requires */ (index_pair_like, size_t>::value) + ) +constexpr auto last_of(T, const Extents &, const std::tuple& i) { + return get<1>(i); +} +#endif + MDSPAN_TEMPLATE_REQUIRES( size_t k, class Extents, class IdxT1, class IdxT2, /* requires */ (index_pair_like, size_t>::value) @@ -201,12 +246,31 @@ constexpr auto last_of(std::integral_constant, const Extents &, const return i.second; } +#if defined(MDSPAN_ENABLE_P3663) +MDSPAN_TEMPLATE_REQUIRES( + __mdspan_integral_constant_like T, class Extents, class IdxT1, class IdxT2, + /* requires */ (index_pair_like, size_t>::value) + ) +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(T, const Extents &, const std::pair& i) { + return i.second; +} +#endif + template MDSPAN_INLINE_FUNCTION constexpr auto last_of(std::integral_constant, const Extents &, const std::complex &i) { return i.imag(); } +#if defined(MDSPAN_ENABLE_P3663) +template<__mdspan_integral_constant_like ICL, class Extents, class T> +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(ICL, const Extents &, const std::complex &i) { + return i.imag(); +} +#endif + // Suppress spurious warning with NVCC about no return statement. // This is a known issue in NVCC and NVC++ // Depending on the CUDA and GCC version we need both the builtin @@ -252,6 +316,19 @@ constexpr auto last_of(std::integral_constant, const Extents &ext, #pragma diagnostic pop #endif +#if defined(MDSPAN_ENABLE_P3663) +template<__mdspan_integral_constant_like T, class Extents> +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(T, const Extents &ext, + ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { + if constexpr (Extents::static_extent(T::value) == dynamic_extent) { + return ext.extent(T::value); + } else { + return integral_constant(); + } +} +#endif + template MDSPAN_INLINE_FUNCTION @@ -261,6 +338,17 @@ last_of(std::integral_constant, const Extents &, return r.extent; } +#if defined(MDSPAN_ENABLE_P3663) +template <__mdspan_integral_constant_like ICL, class Extents, class OffsetType, class ExtentType, + class StrideType> +MDSPAN_INLINE_FUNCTION +constexpr OffsetType +last_of(ICL, const Extents &, + const strided_slice &r) { + return r.extent; +} +#endif + // get stride of slices template MDSPAN_INLINE_FUNCTION @@ -291,6 +379,19 @@ constexpr auto divide(const std::integral_constant &, return integral_constant(); } +#if defined(MDSPAN_ENABLE_P3663) +template +MDSPAN_INLINE_FUNCTION +constexpr auto divide(const std::constant_wrapper &, + const std::constant_wrapper &) { + // cutting short division by zero + // this is used for strided_slice with zero extent/stride + // + // NOTE For now, use integral_constant internally. + return integral_constant(); +} +#endif + // multiply which can deal with integral constant preservation template MDSPAN_INLINE_FUNCTION @@ -322,6 +423,13 @@ struct StaticExtentFromRange, constexpr static size_t value = val1 - val0; }; +#if defined(MDSPAN_ENABLE_P3663) +template <__mdspan_integral_constant_like A, __mdspan_integral_constant_like B> +struct StaticExtentFromRange { + constexpr static size_t value = B::value - A::value; +}; +#endif + // compute new static extent from strided_slice, preserving static // knowledge template struct StaticExtentFromStridedRange { @@ -340,6 +448,13 @@ struct StaticExtentFromStridedRange, constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0; }; +#if defined(MDSPAN_ENABLE_P3663) +template <__mdspan_integral_constant_like A, __mdspan_integral_constant_like B> +struct StaticExtentFromStridedRange { + constexpr static size_t value = A::value > 0 ? 1 + (A::value - 1) / B::value : 0; +}; +#endif + // creates new extents through recursive calls to next_extent member function // next_extent has different overloads for different types of stride specifiers template From de58bc0cf5f5af32e3bcc35ae23e1c7706c6335d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 22 May 2025 21:14:44 -0600 Subject: [PATCH 012/103] submdspan_mapping with canonical slices actually builds now --- .../experimental/__p2630_bits/submdspan.hpp | 1 + .../__p2630_bits/submdspan_extents.hpp | 74 ++++++++++++++----- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index c3d71aec..9f297cf4 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -30,6 +30,7 @@ submdspan(const mdspan &src, #if defined(MDSPAN_ENABLE_P3663) [[maybe_unused]] auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); static_assert(sizeof...(canonical_slices) == sizeof...(slices)); + [[maybe_unused]] auto sub_map_result = submdspan_mapping(src.mapping(), canonical_slices...); #if 0 // TODO FIX IN PROPOSAL: [canonical_]slices (incorrect formatting). diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 6b33f1f3..c5c8fa50 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -90,6 +90,18 @@ struct index_pair_like, IndexType> { // first_of(slice): getting begin of slice specifier range + +#if defined(MDSPAN_ENABLE_P3663) + +template + requires (std::is_signed_v || std::is_unsigned_v) +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(Integral i) { + return i; +} + +#else + MDSPAN_TEMPLATE_REQUIRES( class Integral, /* requires */(std::is_convertible_v) @@ -99,15 +111,7 @@ constexpr Integral first_of(const Integral &i) { return i; } -// FIXME Pre-P3663, first_of should work on any integral-constant-like. - -// NOTE This is technically not conforming. -// Pre-P3663, first_of should work on any integral-constant-like type. -template -MDSPAN_INLINE_FUNCTION -constexpr Integral first_of(const std::integral_constant&) { - return integral_constant(); -} +#endif // MDSPAN_ENABLE_P3663 #if defined(MDSPAN_ENABLE_P3663) // NOTE (mfh 2025/03/07) Canonicalize integral-constant-like @@ -117,9 +121,18 @@ constexpr Integral first_of(const std::integral_constant&) { // std::constant_wrapper. template<__mdspan_integral_constant_like T> MDSPAN_INLINE_FUNCTION -constexpr std::integral_constant, T::value> +constexpr auto first_of(const T&) { - return {}; + return std::integral_constant{}; +} +#else + +// NOTE This is technically not conforming. +// Pre-P3663, first_of should work on any integral-constant-like type. +template +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(const std::integral_constant&) { + return integral_constant(); } #endif @@ -374,6 +387,11 @@ template MDSPAN_INLINE_FUNCTION constexpr auto divide(const std::integral_constant &, const std::integral_constant &) { +#if defined(MDSPAN_ENABLE_P3663) + static_assert(std::is_signed_v || std::is_unsigned_v); + static_assert(std::is_signed_v || std::is_unsigned_v); +#endif + // cutting short division by zero // this is used for strided_slice with zero extent/stride return integral_constant(); @@ -382,13 +400,18 @@ constexpr auto divide(const std::integral_constant &, #if defined(MDSPAN_ENABLE_P3663) template MDSPAN_INLINE_FUNCTION -constexpr auto divide(const std::constant_wrapper &, - const std::constant_wrapper &) { +constexpr auto divide(std::constant_wrapper i0, + std::constant_wrapper i1) { + using I0 = typename std::constant_wrapper::value_type; + using I1 = typename std::constant_wrapper::value_type; + static_assert(std::is_signed_v || std::is_unsigned_v); + static_assert(std::is_signed_v || std::is_unsigned_v); + // cutting short division by zero // this is used for strided_slice with zero extent/stride // // NOTE For now, use integral_constant internally. - return integral_constant(); + return integral_constant(); } #endif @@ -406,6 +429,20 @@ constexpr auto multiply(const std::integral_constant &, return integral_constant(); } +#if defined(MDSPAN_ENABLE_P3663) +template +MDSPAN_INLINE_FUNCTION +constexpr auto multiply(std::constant_wrapper i0, + std::constant_wrapper i1) { + using I0 = typename std::constant_wrapper::value_type; + using I1 = typename std::constant_wrapper::value_type; + static_assert(std::is_signed_v || std::is_unsigned_v); + static_assert(std::is_signed_v || std::is_unsigned_v); + + return integral_constant(); +} +#endif + // compute new static extent from range, preserving static knowledge template struct StaticExtentFromRange { constexpr static size_t value = dynamic_extent; @@ -566,8 +603,11 @@ constexpr auto canonical_ice(S s) { // cast again to IndexType, so that we don't get a weird // constant_wrapper whose value has a different type // than the second template argument. + + // TODO NOT IN PROPOSAL? Make sure constant_wrapper only has one template argument. + // The first template argument is a value of an exposition-only type, NOT the actual value! if constexpr (__mdspan_integral_constant_like) { - return std::constant_wrapper(index_cast(S::value)), IndexType>{}; + return std::constant_wrapper(index_cast(S::value))>{}; } else { return static_cast(index_cast(s)); @@ -764,8 +804,8 @@ template template constexpr bool is_constant_wrapper = false; -template -constexpr bool is_constant_wrapper> = true; +template +constexpr bool is_constant_wrapper> = true; // [mdspan.sub.slices] 1 template From a336fb01632bebadc625a93ab51f02f07bc606d0 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 22 May 2025 21:28:40 -0600 Subject: [PATCH 013/103] CHECKPOINT submdspan builds now when submdspan_mapping is called with canonical slices, but the submdspan test fails. Investigate whether this is a bug in the implementation. --- .../experimental/__p2630_bits/submdspan.hpp | 12 +++------- .../__p2630_bits/submdspan_mapping.hpp | 22 +++++++++++++++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index 9f297cf4..4357a7e6 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -28,11 +28,8 @@ submdspan(const mdspan &src, SliceSpecifiers... slices) { #if defined(MDSPAN_ENABLE_P3663) - [[maybe_unused]] auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); + auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); static_assert(sizeof...(canonical_slices) == sizeof...(slices)); - [[maybe_unused]] auto sub_map_result = submdspan_mapping(src.mapping(), canonical_slices...); - -#if 0 // TODO FIX IN PROPOSAL: [canonical_]slices (incorrect formatting). auto sub_map_result = submdspan_mapping(src.mapping(), canonical_slices...); // TODO FIX IN PROPOSAL: It's src.data_handle(), not src.data(). @@ -40,10 +37,7 @@ submdspan(const mdspan &src, return mdspan(src.accessor().offset(src.data_handle(), sub_map_result.offset), sub_map_result.mapping, typename AccessorPolicy::offset_policy(src.accessor())); -#endif // 0 -#endif - -//#else +#else const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...); // NVCC has a problem with the deduction so lets figure out the type using sub_mapping_t = std::remove_cv_t; @@ -54,6 +48,6 @@ submdspan(const mdspan &src, src.accessor().offset(src.data_handle(), sub_submdspan_mapping_result.offset), sub_submdspan_mapping_result.mapping, sub_accessor_t(src.accessor())); -//#endif +#endif } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 5a829f06..2af015d5 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -132,6 +132,26 @@ MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( static_cast(get(slices_stride_factor)))...}}; } +// NOTE Make the submdspan_mapping_impl functions recognize +// strided_slice with compile-time stride 1 as a range slice. +// Otherwise, they fall back to layout_stride::mapping. +// This might be a bug in the pre-P3663 implementation. + +#if defined(MDSPAN_ENABLE_P3663) + +template +struct is_range_slice : std::false_type {}; + +template +struct is_range_slice : std::true_type {}; + +template +struct is_range_slice>, IndexType> { + static constexpr bool value = (std::constant_wrapper{}() == IndexType(1)); +}; + +#else + template struct is_range_slice { constexpr static bool value = @@ -139,6 +159,8 @@ struct is_range_slice { index_pair_like::value; }; +#endif // MDSPAN_ENABLE_P3663 + template constexpr bool is_range_slice_v = is_range_slice::value; From 4ab3ef6c6f8a2af32f45b1522300dc78a72dbc41 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 22 May 2025 22:49:07 -0600 Subject: [PATCH 014/103] P3663 & non-P3663 versions build & pass tests Fix extent computation for std::complex. It builds and passes tests. NEXT STEPS: * Make it ill-formed to call submdspan_mapping with non-canonical slice types * Make P3663 version only use constant_wrapper and canonical slice types in submdspan_mapping_impl functions --- include/experimental/__p2630_bits/submdspan_extents.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index c5c8fa50..405bbf0d 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -896,7 +896,7 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli else if constexpr (detail::is_std_complex) { return strided_slice{ .offset = canonical_ice(s.real()), - .extent = canonical_ice(s.imag()), + .extent = canonical_ice(s.imag() - s.real()), .stride = std::cw }; } From c82425f7a1befa70af349c5ab7758b3feabdca89 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 23 May 2025 09:24:14 -0600 Subject: [PATCH 015/103] Deploy is_canonical_slice_type in layout_left (submdspan_mapping_impl) --- .../__p2630_bits/submdspan_extents.hpp | 34 +++++++++++-------- .../__p2630_bits/submdspan_mapping.hpp | 4 +++ 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 405bbf0d..42d4db6f 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -804,32 +804,36 @@ template template constexpr bool is_constant_wrapper = false; -template -constexpr bool is_constant_wrapper> = true; +template +constexpr bool is_constant_wrapper> = true; // [mdspan.sub.slices] 1 template -constexpr bool is_canonical_submdspan_index_type = - std::is_same_v || ( - is_constant_wrapper && - std::is_same_v - ); +constexpr bool is_canonical_submdspan_index_type() { + if constexpr (is_constant_wrapper) { + using value_type = typename T::value_type; + return std::is_same_v; + } + else { + return std::is_same_v; + } +} // [mdspan.sub.slices] 2 template MDSPAN_INLINE_FUNCTION constexpr bool is_canonical_slice_type() { - if constexpr ( - std::is_same_v || // 2.1 - is_canonical_submdspan_index_type) // 2.2 - { + if constexpr (std::is_same_v) { // 2.1 + return true; + } + else if constexpr (is_canonical_submdspan_index_type()) { // 2.2 return true; } else if constexpr (is_strided_slice::value) { // 2.3 if constexpr ( // 2.3.1 - is_canonical_submdspan_index_type && - is_canonical_submdspan_index_type && - is_canonical_submdspan_index_type) + is_canonical_submdspan_index_type() && + is_canonical_submdspan_index_type() && + is_canonical_submdspan_index_type()) { if constexpr ( is_constant_wrapper && @@ -840,7 +844,7 @@ constexpr bool is_canonical_slice_type() { return Extent == 0 || Stride > 0; // 2.3.2 } else { - return false; + return true; } } else { diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 2af015d5..210d510a 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -265,6 +265,10 @@ MDSPAN_INLINE_FUNCTION constexpr auto layout_left::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { +#if defined(MDSPAN_ENABLE_P3663) + static_assert((detail::is_canonical_slice_type>() && ...)); +#endif // MDSPAN_ENABLE_P3663 + // compute sub extents using src_ext_t = Extents; auto dst_ext = submdspan_extents(extents(), slices...); From a87b24d1f55d242375c5152f86b2cafec2a6c89e Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 23 May 2025 11:29:07 -0600 Subject: [PATCH 016/103] Add is_canonical_slice_type check to all submdspan_mapping_impl --- .../__p2630_bits/submdspan_mapping.hpp | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 210d510a..f79e9183 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -265,6 +265,7 @@ MDSPAN_INLINE_FUNCTION constexpr auto layout_left::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { + // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. #if defined(MDSPAN_ENABLE_P3663) static_assert((detail::is_canonical_slice_type>() && ...)); #endif // MDSPAN_ENABLE_P3663 @@ -332,6 +333,16 @@ MDSPAN_INLINE_FUNCTION constexpr auto MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { + // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. +#if defined(MDSPAN_ENABLE_P3663) + { + using MDSPAN_IMPL_STANDARD_NAMESPACE::detail::is_canonical_slice_type; + static_assert((is_canonical_slice_type< + typename Extents::index_type, + std::remove_cvref_t>() && ...)); + } +#endif // MDSPAN_ENABLE_P3663 + // compute sub extents using src_ext_t = Extents; auto dst_ext = submdspan_extents(extents(), slices...); @@ -500,6 +511,11 @@ MDSPAN_INLINE_FUNCTION constexpr auto layout_right::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { + // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. +#if defined(MDSPAN_ENABLE_P3663) + static_assert((detail::is_canonical_slice_type>() && ...)); +#endif // MDSPAN_ENABLE_P3663 + // compute sub extents using src_ext_t = Extents; auto dst_ext = submdspan_extents(extents(), slices...); @@ -565,6 +581,16 @@ MDSPAN_INLINE_FUNCTION constexpr auto MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { + // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. +#if defined(MDSPAN_ENABLE_P3663) + { + using MDSPAN_IMPL_STANDARD_NAMESPACE::detail::is_canonical_slice_type; + static_assert((is_canonical_slice_type< + typename Extents::index_type, + std::remove_cvref_t>() && ...)); + } +#endif // MDSPAN_ENABLE_P3663 + // compute sub extents using src_ext_t = Extents; auto dst_ext = submdspan_extents(extents(), slices...); @@ -640,6 +666,12 @@ template MDSPAN_INLINE_FUNCTION constexpr auto layout_stride::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { + + // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. +#if defined(MDSPAN_ENABLE_P3663) + static_assert((detail::is_canonical_slice_type>() && ...)); +#endif // MDSPAN_ENABLE_P3663 + auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); auto inv_map = detail::inv_map_rank(std::integral_constant(), From 25f725e8f6009b3c82bcfe5d928cedd3b13ad987 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 23 May 2025 11:39:21 -0600 Subject: [PATCH 017/103] Check canonical k-th slice type All submdspan_mapping_impl functions now check that every k-th slice type is a canonical k-th slice type. This ensures that submdspan_mapping_impl is only well-formed for canonical k-th slice types, and is ill-formed otherwise. --- .../__p2630_bits/submdspan_extents.hpp | 22 +++++++++---- .../__p2630_bits/submdspan_mapping.hpp | 32 +++++++++---------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 42d4db6f..91ba0e90 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -857,19 +857,31 @@ constexpr bool is_canonical_slice_type() { } // [mdspan.sub.slices] 3 + template MDSPAN_INLINE_FUNCTION -constexpr auto -is_canonical_kth_submdspan_slice_type(const extents& exts, Slice slice) +constexpr void +check_canonical_kth_submdspan_slice_type(const extents& exts, Slice slice) { if constexpr (! is_canonical_slice_type()) { - return false; // 3.1 + static_assert(false); } else { // 3.2 - return check_static_bounds(exts) != check_static_bounds_result::out_of_bounds; + static_assert(check_static_bounds(exts) != check_static_bounds_result::out_of_bounds); } } +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_canonical_kth_subdmspan_slice_types( + const extents& exts, Slices... slices) +{ + [&] (std::index_sequence) { + (check_canonical_kth_submdspan_slice_type(exts, slices...[Inds]), ...); + } (std::make_index_sequence{}); +} + // [mdspan.sub.slices] 11 template MDSPAN_INLINE_FUNCTION @@ -877,8 +889,6 @@ constexpr auto submdspan_canonicalize_one_slice(const extents& exts, Slice s) { // Part of [mdspan.sub.slices] 9. // This could be combined with the if constexpr branches below. - // - // NOTE This is not a constant expression (because it takes exts). static_assert(check_static_bounds(exts) != check_static_bounds_result::out_of_bounds); // TODO Check Precondition that s is a valid k-th submdspan slice for exts. diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index f79e9183..4d94bc62 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -265,9 +265,11 @@ MDSPAN_INLINE_FUNCTION constexpr auto layout_left::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { - // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. #if defined(MDSPAN_ENABLE_P3663) - static_assert((detail::is_canonical_slice_type>() && ...)); + { + using detail::check_canonical_kth_subdmspan_slice_types; + check_canonical_kth_subdmspan_slice_types(extents(), slices...); + } #endif // MDSPAN_ENABLE_P3663 // compute sub extents @@ -333,13 +335,10 @@ MDSPAN_INLINE_FUNCTION constexpr auto MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { - // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. #if defined(MDSPAN_ENABLE_P3663) { - using MDSPAN_IMPL_STANDARD_NAMESPACE::detail::is_canonical_slice_type; - static_assert((is_canonical_slice_type< - typename Extents::index_type, - std::remove_cvref_t>() && ...)); + using MDSPAN_IMPL_STANDARD_NAMESPACE::detail::check_canonical_kth_subdmspan_slice_types; + check_canonical_kth_subdmspan_slice_types(extents(), slices...); } #endif // MDSPAN_ENABLE_P3663 @@ -511,9 +510,11 @@ MDSPAN_INLINE_FUNCTION constexpr auto layout_right::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { - // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. #if defined(MDSPAN_ENABLE_P3663) - static_assert((detail::is_canonical_slice_type>() && ...)); + { + using detail::check_canonical_kth_subdmspan_slice_types; + check_canonical_kth_subdmspan_slice_types(extents(), slices...); + } #endif // MDSPAN_ENABLE_P3663 // compute sub extents @@ -581,13 +582,10 @@ MDSPAN_INLINE_FUNCTION constexpr auto MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { - // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. #if defined(MDSPAN_ENABLE_P3663) { - using MDSPAN_IMPL_STANDARD_NAMESPACE::detail::is_canonical_slice_type; - static_assert((is_canonical_slice_type< - typename Extents::index_type, - std::remove_cvref_t>() && ...)); + using MDSPAN_IMPL_STANDARD_NAMESPACE::detail::check_canonical_kth_subdmspan_slice_types; + check_canonical_kth_subdmspan_slice_types(extents(), slices...); } #endif // MDSPAN_ENABLE_P3663 @@ -667,9 +665,11 @@ MDSPAN_INLINE_FUNCTION constexpr auto layout_stride::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { - // TODO We actually need to check that Slices...[k] is a canonical k-th submdspan slice type for all k. #if defined(MDSPAN_ENABLE_P3663) - static_assert((detail::is_canonical_slice_type>() && ...)); + { + using detail::check_canonical_kth_subdmspan_slice_types; + check_canonical_kth_subdmspan_slice_types(extents(), slices...); + } #endif // MDSPAN_ENABLE_P3663 auto dst_ext = submdspan_extents(extents(), slices...); From 0e17f6782eadedcc5096116662af9190e00e78f7 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 26 May 2025 22:46:28 -0600 Subject: [PATCH 018/103] Replace integral_constant with cw in P3663 branch only. --- .../__p2630_bits/strided_slice.hpp | 5 + .../__p2630_bits/submdspan_extents.hpp | 424 +++++++++++------- .../__p2630_bits/submdspan_mapping.hpp | 53 ++- 3 files changed, 312 insertions(+), 170 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 0422dfb3..527f891b 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -45,6 +45,7 @@ namespace { // (anonymous) template<__mdspan_integral_constant_like T> struct mdspan_is_integral_constant : std::true_type {}; #else + // NOTE Does this mean existing code is not conforming? template struct mdspan_is_integral_constant>: std::true_type {}; #endif @@ -52,7 +53,11 @@ namespace { // (anonymous) template constexpr bool __mdspan_is_index_like_v = (std::is_integral_v && ! std::is_same_v) || +#if defined(MDSPAN_ENABLE_P3663) + __mdspan_integral_constant_like; +#else mdspan_is_integral_constant::value; +#endif ; } // namespace (anonymous) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 91ba0e90..eeb3e357 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -29,23 +29,76 @@ namespace detail { // InvMapRank is an index_sequence, which we build recursively // to contain the mapped indices. // end of recursion specialization containing the final index_sequence -template + +template < +#if defined(MDSPAN_ENABLE_P3663) + auto +#else + size_t +#endif + Counter, size_t... MapIdxs> MDSPAN_INLINE_FUNCTION -constexpr auto inv_map_rank(std::integral_constant, std::index_sequence) { +constexpr auto inv_map_rank( +#if defined(MDSPAN_ENABLE_P3663) + std::constant_wrapper, +#else + std::integral_constant, +#endif + std::index_sequence) +{ return std::index_sequence(); } // specialization reducing rank by one (i.e., integral slice specifier) -template +template< +#if defined(MDSPAN_ENABLE_P3663) + auto +#else + size_t +#endif + Counter, + class Slice, + class... SliceSpecifiers, + size_t... MapIdxs> MDSPAN_INLINE_FUNCTION -constexpr auto inv_map_rank(std::integral_constant, std::index_sequence, Slice, - SliceSpecifiers... slices) { - using next_idx_seq_t = std::conditional_t, - std::index_sequence, - std::index_sequence>; +constexpr auto inv_map_rank( +#if defined(MDSPAN_ENABLE_P3663) + std::constant_wrapper counter, +#else + std::integral_constant, +#endif + std::index_sequence, + Slice, + SliceSpecifiers... slices) +{ + constexpr size_t counter_value = +#if defined(MDSPAN_ENABLE_P3663) + decltype(counter){}(); +#else + Counter; +#endif + + using next_idx_seq_t = std::conditional_t< + std::is_convertible_v, + std::index_sequence, + std::index_sequence + >; - return inv_map_rank(std::integral_constant(), next_idx_seq_t(), - slices...); +#if defined(MDSPAN_ENABLE_P3663) + static_assert(std::is_same_v< + decltype(counter + std::cw), + std::constant_wrapper + >); +#endif + + return inv_map_rank( +#if defined(MDSPAN_ENABLE_P3663) + std::cw, +#else + std::integral_constant(), +#endif + next_idx_seq_t(), + slices...); } // Helper for identifying strided_slice @@ -114,32 +167,37 @@ constexpr Integral first_of(const Integral &i) { #endif // MDSPAN_ENABLE_P3663 #if defined(MDSPAN_ENABLE_P3663) -// NOTE (mfh 2025/03/07) Canonicalize integral-constant-like -// to std::integral_constant, just to get things working for now. -// Later, go back and replace all use of std::integral_constant -// in the various mappings' submdspan_mapping_impl functions with -// std::constant_wrapper. -template<__mdspan_integral_constant_like T> +template MDSPAN_INLINE_FUNCTION constexpr auto -first_of(const T&) { - return std::integral_constant{}; +first_of(std::constant_wrapper i) { + return i; } #else - // NOTE This is technically not conforming. // Pre-P3663, first_of should work on any integral-constant-like type. +// Replacing the return type "Integral" with auto does not change test results. template MDSPAN_INLINE_FUNCTION -constexpr Integral first_of(const std::integral_constant&) { +constexpr Integral +first_of(const std::integral_constant&) { return integral_constant(); } #endif MDSPAN_INLINE_FUNCTION -constexpr integral_constant +constexpr +#if defined(MDSPAN_ENABLE_P3663) +auto +#else +integral_constant +#endif first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { - return integral_constant(); +#if defined(MDSPAN_ENABLE_P3663) + return std::cw; +#else + return {}; +#endif } // TODO P3663 won't need this overload, @@ -188,101 +246,125 @@ first_of(const strided_slice &r) { // of the original view and which rank from the extents. // This is needed in the case of slice being full_extent_t. MDSPAN_TEMPLATE_REQUIRES( - size_t k, class Extents, class Integral, +#if defined(MDSPAN_ENABLE_P3663) + auto +#else + size_t +#endif + k, + class Extents, + class Integral, /* requires */(std::is_convertible_v) ) MDSPAN_INLINE_FUNCTION -constexpr Integral - last_of(std::integral_constant, const Extents &, const Integral &i) { - return i; -} - +constexpr Integral last_of( #if defined(MDSPAN_ENABLE_P3663) -// NOTE (mfh 2025/03/07) This should probably replace the above overload. -template<__mdspan_integral_constant_like T, class Extents, class Integral> - requires(std::is_convertible_v) -MDSPAN_INLINE_FUNCTION -constexpr Integral last_of(T, const Extents&, const Integral& i) { + std::constant_wrapper, +#else + std::integral_constant, +#endif + const Extents &, + const Integral &i) +{ return i; } -#endif // MDSPAN_ENABLE_P3663 - -MDSPAN_TEMPLATE_REQUIRES( - size_t k, class Extents, class Slice, - /* requires */(index_pair_like::value) -) -MDSPAN_INLINE_FUNCTION -constexpr auto last_of(std::integral_constant, const Extents &, - const Slice &i) { - return get<1>(i); -} // NOTE P3663 should not need this overload, // because last_of should never see a pair-like type. -#if defined(MDSPAN_ENABLE_P3663) MDSPAN_TEMPLATE_REQUIRES( - __mdspan_integral_constant_like T, class Extents, class Slice, +#if defined(MDSPAN_ENABLE_P3663) + auto +#else + size_t +#endif + k, + class Extents, class Slice, /* requires */(index_pair_like::value) ) MDSPAN_INLINE_FUNCTION -constexpr auto last_of(T, const Extents &, - const Slice &i) { +constexpr auto last_of( +#if defined(MDSPAN_ENABLE_P3663) + std::constant_wrapper, +#else + std::integral_constant, +#endif + const Extents &, + const Slice &i) +{ +#if defined(MDSPAN_ENABLE_P3663) using std::get; - return get<1>(i); -} #endif - -MDSPAN_TEMPLATE_REQUIRES( - size_t k, class Extents, class IdxT1, class IdxT2, - /* requires */ (index_pair_like, size_t>::value) - ) -constexpr auto last_of(std::integral_constant, const Extents &, const std::tuple& i) { return get<1>(i); } -#if defined(MDSPAN_ENABLE_P3663) MDSPAN_TEMPLATE_REQUIRES( - __mdspan_integral_constant_like T, class Extents, class IdxT1, class IdxT2, +#if defined(MDSPAN_ENABLE_P3663) + auto +#else + size_t +#endif + k, + class Extents, class IdxT1, class IdxT2, /* requires */ (index_pair_like, size_t>::value) ) -constexpr auto last_of(T, const Extents &, const std::tuple& i) { +constexpr auto last_of( +#if defined(MDSPAN_ENABLE_P3663) + std::constant_wrapper, +#else + std::integral_constant, +#endif + const Extents &, + const std::tuple& i) +{ +#if defined(MDSPAN_ENABLE_P3663) + using std::get; +#endif return get<1>(i); } -#endif MDSPAN_TEMPLATE_REQUIRES( - size_t k, class Extents, class IdxT1, class IdxT2, - /* requires */ (index_pair_like, size_t>::value) - ) -MDSPAN_INLINE_FUNCTION -constexpr auto last_of(std::integral_constant, const Extents &, const std::pair& i) { - return i.second; -} - #if defined(MDSPAN_ENABLE_P3663) -MDSPAN_TEMPLATE_REQUIRES( - __mdspan_integral_constant_like T, class Extents, class IdxT1, class IdxT2, + auto +#else + size_t +#endif + k, + class Extents, class IdxT1, class IdxT2, /* requires */ (index_pair_like, size_t>::value) ) MDSPAN_INLINE_FUNCTION -constexpr auto last_of(T, const Extents &, const std::pair& i) { - return i.second; -} +constexpr auto last_of( +#if defined(MDSPAN_ENABLE_P3663) + std::constant_wrapper, +#else + std::integral_constant, #endif - -template -MDSPAN_INLINE_FUNCTION -constexpr auto last_of(std::integral_constant, const Extents &, const std::complex &i) { - return i.imag(); + const Extents &, + const std::pair& i) +{ + return i.second; } +template< #if defined(MDSPAN_ENABLE_P3663) -template<__mdspan_integral_constant_like ICL, class Extents, class T> + auto +#else + size_t +#endif + k, + class Extents, class T> MDSPAN_INLINE_FUNCTION -constexpr auto last_of(ICL, const Extents &, const std::complex &i) { +constexpr auto last_of( +#if defined(MDSPAN_ENABLE_P3663) + std::constant_wrapper, +#else + std::integral_constant, +#endif + const Extents &, + const std::complex &i) +{ return i.imag(); } -#endif // Suppress spurious warning with NVCC about no return statement. // This is a known issue in NVCC and NVC++ @@ -303,14 +385,39 @@ constexpr auto last_of(ICL, const Extents &, const std::complex &i) { #pragma diagnostic push #pragma diag_suppress = implicit_return_from_non_void_function #endif -template +template < +#if defined(MDSPAN_ENABLE_P3663) + auto +#else + size_t +#endif + k, + class Extents> MDSPAN_INLINE_FUNCTION -constexpr auto last_of(std::integral_constant, const Extents &ext, - ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { - if constexpr (Extents::static_extent(k) == dynamic_extent) { - return ext.extent(k); - } else { - return integral_constant(); +constexpr auto last_of( +#if defined(MDSPAN_ENABLE_P3663) + std::constant_wrapper, +#else + std::integral_constant, +#endif + const Extents &ext, + ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) +{ +#if defined(MDSPAN_ENABLE_P3663) + constexpr size_t k_value = std::constant_wrapper{}(); +#else + constexpr size_t k_value = k; +#endif + + if constexpr (Extents::static_extent(k_value) == dynamic_extent) { + return ext.extent(k_value); + } + else { +#if defined(MDSPAN_ENABLE_P3663) + return std::cw; +#else + return integral_constant(); +#endif } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) // Even with CUDA_ARCH protection this thing warns about calling host function @@ -329,44 +436,40 @@ constexpr auto last_of(std::integral_constant, const Extents &ext, #pragma diagnostic pop #endif +template < #if defined(MDSPAN_ENABLE_P3663) -template<__mdspan_integral_constant_like T, class Extents> -MDSPAN_INLINE_FUNCTION -constexpr auto last_of(T, const Extents &ext, - ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { - if constexpr (Extents::static_extent(T::value) == dynamic_extent) { - return ext.extent(T::value); - } else { - return integral_constant(); - } -} -#endif - -template + auto +#else + size_t +#endif + k, + class Extents, + class OffsetType, + class ExtentType, + class StrideType> MDSPAN_INLINE_FUNCTION constexpr OffsetType -last_of(std::integral_constant, const Extents &, - const strided_slice &r) { - return r.extent; -} - +last_of( #if defined(MDSPAN_ENABLE_P3663) -template <__mdspan_integral_constant_like ICL, class Extents, class OffsetType, class ExtentType, - class StrideType> -MDSPAN_INLINE_FUNCTION -constexpr OffsetType -last_of(ICL, const Extents &, - const strided_slice &r) { - return r.extent; -} + std::constant_wrapper, +#else + std::integral_constant, #endif + const Extents &, + const strided_slice &r) +{ + return r.extent; // FIXME then why does this return OffsetType? +} // get stride of slices template MDSPAN_INLINE_FUNCTION constexpr auto stride_of(const T &) { +#if defined(MDSPAN_ENABLE_P3663) + return std::cw; +#else return integral_constant(); +#endif } template @@ -383,20 +486,6 @@ constexpr auto divide(const T0 &v0, const T1 &v1) { return IndexT(v0) / IndexT(v1); } -template -MDSPAN_INLINE_FUNCTION -constexpr auto divide(const std::integral_constant &, - const std::integral_constant &) { -#if defined(MDSPAN_ENABLE_P3663) - static_assert(std::is_signed_v || std::is_unsigned_v); - static_assert(std::is_signed_v || std::is_unsigned_v); -#endif - - // cutting short division by zero - // this is used for strided_slice with zero extent/stride - return integral_constant(); -} - #if defined(MDSPAN_ENABLE_P3663) template MDSPAN_INLINE_FUNCTION @@ -409,9 +498,16 @@ constexpr auto divide(std::constant_wrapper i0, // cutting short division by zero // this is used for strided_slice with zero extent/stride - // - // NOTE For now, use integral_constant internally. - return integral_constant(); + return std::cw; +} +#else +template +MDSPAN_INLINE_FUNCTION +constexpr auto divide(const std::integral_constant &, + const std::integral_constant &) { + // cutting short division by zero + // this is used for strided_slice with zero extent/stride + return integral_constant(); } #endif @@ -422,13 +518,6 @@ constexpr auto multiply(const T0 &v0, const T1 &v1) { return IndexT(v0) * IndexT(v1); } -template -MDSPAN_INLINE_FUNCTION -constexpr auto multiply(const std::integral_constant &, - const std::integral_constant &) { - return integral_constant(); -} - #if defined(MDSPAN_ENABLE_P3663) template MDSPAN_INLINE_FUNCTION @@ -439,7 +528,14 @@ constexpr auto multiply(std::constant_wrapper i0, static_assert(std::is_signed_v || std::is_unsigned_v); static_assert(std::is_signed_v || std::is_unsigned_v); - return integral_constant(); + return std::cw; +} +#else +template +MDSPAN_INLINE_FUNCTION +constexpr auto multiply(const std::integral_constant &, + const std::integral_constant &) { + return integral_constant(); } #endif @@ -448,6 +544,12 @@ template struct StaticExtentFromRange { constexpr static size_t value = dynamic_extent; }; +#if defined(MDSPAN_ENABLE_P3663) +template <__mdspan_integral_constant_like A, __mdspan_integral_constant_like B> +struct StaticExtentFromRange { + constexpr static size_t value = B::value - A::value; +}; +#else template struct StaticExtentFromRange, std::integral_constant> { @@ -459,12 +561,6 @@ struct StaticExtentFromRange, integral_constant> { constexpr static size_t value = val1 - val0; }; - -#if defined(MDSPAN_ENABLE_P3663) -template <__mdspan_integral_constant_like A, __mdspan_integral_constant_like B> -struct StaticExtentFromRange { - constexpr static size_t value = B::value - A::value; -}; #endif // compute new static extent from strided_slice, preserving static @@ -473,6 +569,12 @@ template struct StaticExtentFromStridedRange { constexpr static size_t value = dynamic_extent; }; +#if defined(MDSPAN_ENABLE_P3663) +template <__mdspan_integral_constant_like A, __mdspan_integral_constant_like B> +struct StaticExtentFromStridedRange { + constexpr static size_t value = A::value > 0 ? 1 + (A::value - 1) / B::value : 0; +}; +#else template struct StaticExtentFromStridedRange, std::integral_constant> { @@ -484,12 +586,6 @@ struct StaticExtentFromStridedRange, integral_constant> { constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0; }; - -#if defined(MDSPAN_ENABLE_P3663) -template <__mdspan_integral_constant_like A, __mdspan_integral_constant_like B> -struct StaticExtentFromStridedRange { - constexpr static size_t value = A::value > 0 ? 1 + (A::value - 1) / B::value : 0; -}; #endif // creates new extents through recursive calls to next_extent member function @@ -506,18 +602,28 @@ struct extents_constructor { SlicesAndExtents... slices_and_extents) { constexpr size_t new_static_extent = StaticExtentFromRange< decltype(first_of(std::declval())), - decltype(last_of(std::integral_constant(), - std::declval(), - std::declval()))>::value; + decltype(last_of( +#if defined(MDSPAN_ENABLE_P3663) + std::cw, +#else + std::integral_constant(), +#endif + std::declval(), + std::declval()))>::value; using next_t = extents_constructor; using index_t = typename Extents::index_type; return next_t::next_extent( ext, slices_and_extents..., - index_t(last_of(std::integral_constant(), ext, - sl)) - - index_t(first_of(sl))); + index_t(last_of( +#if defined(MDSPAN_ENABLE_P3663) + std::cw, +#else + std::integral_constant(), +#endif + ext, + sl)) - index_t(first_of(sl))); } MDSPAN_TEMPLATE_REQUIRES( @@ -607,7 +713,7 @@ constexpr auto canonical_ice(S s) { // TODO NOT IN PROPOSAL? Make sure constant_wrapper only has one template argument. // The first template argument is a value of an exposition-only type, NOT the actual value! if constexpr (__mdspan_integral_constant_like) { - return std::constant_wrapper(index_cast(S::value))>{}; + return std::cw(index_cast(S::value))>; } else { return static_cast(index_cast(s)); diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 4d94bc62..28ffeb99 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -146,7 +146,13 @@ template struct is_range_slice : std::true_type {}; template -struct is_range_slice>, IndexType> { +struct is_range_slice< + strided_slice< + OffsetType, + ExtentType, + std::constant_wrapper>, + IndexType> +{ static constexpr bool value = (std::constant_wrapper{}() == IndexType(1)); }; @@ -304,8 +310,13 @@ layout_left::mapping::submdspan_mapping_impl( } else { // layout_stride case using dst_mapping_t = typename layout_stride::mapping; - auto inv_map = detail::inv_map_rank(std::integral_constant(), - std::index_sequence<>(), slices...); + auto inv_map = detail::inv_map_rank( +#if defined(MDSPAN_ENABLE_P3663) + std::cw, +#else + std::integral_constant(), +#endif + std::index_sequence<>(), slices...); return submdspan_mapping_result { dst_mapping_t(mdspan_non_standard, dst_ext, detail::construct_sub_strides( @@ -388,8 +399,13 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping{ dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; } else { // layout_stride - auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), - std::index_sequence<>(), slices...); + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank( +#if defined(MDSPAN_ENABLE_P3663) + std::cw, +#else + std::integral_constant(), +#endif + std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; return submdspan_mapping_result { dst_mapping_t(mdspan_non_standard, dst_ext, @@ -551,8 +567,13 @@ layout_right::mapping::submdspan_mapping_impl( } else { // layout_stride case using dst_mapping_t = typename layout_stride::mapping; - auto inv_map = detail::inv_map_rank(std::integral_constant(), - std::index_sequence<>(), slices...); + auto inv_map = detail::inv_map_rank( +#if defined(MDSPAN_ENABLE_P3663) + std::cw, +#else + std::integral_constant(), +#endif + std::index_sequence<>(), slices...); return submdspan_mapping_result { dst_mapping_t(mdspan_non_standard, dst_ext, detail::construct_sub_strides( @@ -627,8 +648,13 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping{ dst_mapping_t(dst_ext, stride(Extents::rank() - 2 - deduce_layout::gap_len)), offset}; } else { // layout_stride - auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), - std::index_sequence<>(), slices...); + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank( +#if defined(MDSPAN_ENABLE_P3663) + std::cw, +#else + std::integral_constant(), +#endif + std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; return submdspan_mapping_result { dst_mapping_t(mdspan_non_standard, dst_ext, @@ -674,8 +700,13 @@ layout_stride::mapping::submdspan_mapping_impl( auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - auto inv_map = detail::inv_map_rank(std::integral_constant(), - std::index_sequence<>(), slices...); + auto inv_map = detail::inv_map_rank( +#if defined(MDSPAN_ENABLE_P3663) + std::cw, +#else + std::integral_constant(), +#endif + std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. From 7ba02e6cd86a77840c9771b11392d9d3ffd44476 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 27 May 2025 13:51:35 -0600 Subject: [PATCH 019/103] Remove unneeded first_of and last_of overloads --- .../__p2630_bits/submdspan_extents.hpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index eeb3e357..d37d0846 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -200,6 +200,12 @@ first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { #endif } +// P3663 doesn't need any of these overloads, +// because its version of first_of will never see pair-like types. +// (The only "contiguous range of indices" slice types it sees are +// full_extent_t and strided_slice with compile-time unit stride.) +#if ! defined(MDSPAN_ENABLE_P3663) + // TODO P3663 won't need this overload, // because first_of should never see pair-like types. MDSPAN_TEMPLATE_REQUIRES( @@ -234,6 +240,8 @@ constexpr auto first_of(const std::complex &i) { return i.real(); } +#endif + template MDSPAN_INLINE_FUNCTION constexpr OffsetType @@ -269,7 +277,9 @@ constexpr Integral last_of( return i; } -// NOTE P3663 should not need this overload, +#if ! defined(MDSPAN_ENABLE_P3663) + +// P3663 does not need these index_pair_like overloads, // because last_of should never see a pair-like type. MDSPAN_TEMPLATE_REQUIRES( #if defined(MDSPAN_ENABLE_P3663) @@ -366,6 +376,8 @@ constexpr auto last_of( return i.imag(); } +#endif // ! defined(MDSPAN_ENABLE_P3663) + // Suppress spurious warning with NVCC about no return statement. // This is a known issue in NVCC and NVC++ // Depending on the CUDA and GCC version we need both the builtin From 8a5c18f2eb62b8e09a26af78fbedbe61e94f67a1 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 27 May 2025 13:54:07 -0600 Subject: [PATCH 020/103] Remove index_pair_like definition --- include/experimental/__p2630_bits/submdspan_extents.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index d37d0846..e42e99d2 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -108,7 +108,10 @@ template struct is_strided_slice< strided_slice> : std::true_type {}; -// TODO We won't even need index_pair_like when we're done. +// P3663 does not need index_pair_like. In fact, it's impossible +// to define a concept for the set of types that P3663 accepts +// as a pair of indices. +#if ! defined(MDSPAN_ENABLE_P3663) // Helper for identifying valid pair like things template struct index_pair_like : std::false_type {}; @@ -141,6 +144,7 @@ struct index_pair_like, IndexType> { static constexpr bool value = std::is_convertible_v; }; +#endif // ! defined(MDSPAN_ENABLE_P3663) // first_of(slice): getting begin of slice specifier range From 3a5bcc72d30a841fcde0f5d312561bb117e1b19d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 27 May 2025 13:59:17 -0600 Subject: [PATCH 021/103] Replace is_range_slice with is_range_slice_v --- .../__p2630_bits/submdspan_mapping.hpp | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 28ffeb99..d40921af 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -140,36 +140,29 @@ MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( #if defined(MDSPAN_ENABLE_P3663) template -struct is_range_slice : std::false_type {}; +constexpr bool is_range_slice_v = false; template -struct is_range_slice : std::true_type {}; +constexpr bool is_range_slice_v = true; template -struct is_range_slice< - strided_slice< - OffsetType, - ExtentType, - std::constant_wrapper>, - IndexType> -{ - static constexpr bool value = (std::constant_wrapper{}() == IndexType(1)); -}; +constexpr bool is_range_slice_v< + strided_slice< + OffsetType, + ExtentType, + std::constant_wrapper>, + IndexType + > = (std::constant_wrapper{}() == IndexType(1)); #else template -struct is_range_slice { - constexpr static bool value = - std::is_same_v || - index_pair_like::value; -}; +constexpr bool is_range_slice_v = + std::is_same_v || + index_pair_like::value; #endif // MDSPAN_ENABLE_P3663 -template -constexpr bool is_range_slice_v = is_range_slice::value; - template struct is_index_slice { constexpr static bool value = std::is_convertible_v; From c9aa31d51c77c91f2a73e4ae552d68a9e2a356d9 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 27 May 2025 14:02:05 -0600 Subject: [PATCH 022/103] Simplify StaticExtentFromStridedRange Don't need to specialize for all integral-constant-like, just for std::constant_wrapper. --- .../experimental/__p2630_bits/submdspan_extents.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index e42e99d2..8358a714 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -586,9 +586,13 @@ template struct StaticExtentFromStridedRange { }; #if defined(MDSPAN_ENABLE_P3663) -template <__mdspan_integral_constant_like A, __mdspan_integral_constant_like B> -struct StaticExtentFromStridedRange { - constexpr static size_t value = A::value > 0 ? 1 + (A::value - 1) / B::value : 0; +template +struct StaticExtentFromStridedRange, std::constant_wrapper> { +private: + static constexpr auto A_value = std::constant_wrapper{}(); + static constexpr auto B_value = std::constant_wrapper{}(); +public: + constexpr static size_t value = A_value > 0 ? 1 + (A_value - 1) / B_value : 0; }; #else template From cc99e2cd900798ad469c1b3a4049eb0add8086e4 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 27 May 2025 14:19:59 -0600 Subject: [PATCH 023/103] Simplify extents_constructor for P3663 --- .../__p2630_bits/submdspan_extents.hpp | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 8358a714..5b17a386 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -25,6 +25,14 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { +#if defined(MDSPAN_ENABLE_P3663) +template +constexpr bool is_constant_wrapper = false; + +template +constexpr bool is_constant_wrapper> = true; +#endif + // Mapping from submapping ranks to srcmapping ranks // InvMapRank is an index_sequence, which we build recursively // to contain the mapped indices. @@ -612,14 +620,32 @@ struct StaticExtentFromStridedRange, // next_extent has different overloads for different types of stride specifiers template struct extents_constructor { + + // This covers both the full_extent_t and index-pair-like cases. + // P3663 only needs the full_extent_t case. +#if defined(MDSPAN_ENABLE_P3663) + template +#else MDSPAN_TEMPLATE_REQUIRES( class Slice, class... SlicesAndExtents, /* requires */(!std::is_convertible_v && !is_strided_slice::value) ) +#endif MDSPAN_INLINE_FUNCTION - constexpr static auto next_extent(const Extents &ext, const Slice &sl, - SlicesAndExtents... slices_and_extents) { + constexpr static auto next_extent( + const Extents &ext, +#if defined(MDSPAN_ENABLE_P3663) + full_extent_t sl, +#else + const Slice &sl, +#endif + SlicesAndExtents... slices_and_extents) + { +#if defined(MDSPAN_ENABLE_P3663) + using Slice = full_extent_t; +#endif + constexpr size_t new_static_extent = StaticExtentFromRange< decltype(first_of(std::declval())), decltype(last_of( @@ -927,12 +953,6 @@ template } } -template -constexpr bool is_constant_wrapper = false; - -template -constexpr bool is_constant_wrapper> = true; - // [mdspan.sub.slices] 1 template constexpr bool is_canonical_submdspan_index_type() { From 443d3fce5683ff062e539aa9f0e071ee35fff873 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 27 May 2025 14:22:49 -0600 Subject: [PATCH 024/103] Remove superfluous TODO --- include/experimental/__p2630_bits/submdspan_extents.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 5b17a386..0cf53ba6 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -218,8 +218,6 @@ first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { // full_extent_t and strided_slice with compile-time unit stride.) #if ! defined(MDSPAN_ENABLE_P3663) -// TODO P3663 won't need this overload, -// because first_of should never see pair-like types. MDSPAN_TEMPLATE_REQUIRES( class Slice, /* requires */(index_pair_like::value) From 57ae57802775b06616099529bcf80cc015aceca2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 27 May 2025 17:02:42 -0600 Subject: [PATCH 025/103] Reconcile implementation with draft P3663R2 --- .../experimental/__p2630_bits/submdspan.hpp | 20 +-- .../__p2630_bits/submdspan_extents.hpp | 140 ++++++++++-------- 2 files changed, 86 insertions(+), 74 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index 4357a7e6..aab70a5e 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -28,15 +28,17 @@ submdspan(const mdspan &src, SliceSpecifiers... slices) { #if defined(MDSPAN_ENABLE_P3663) - auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); - static_assert(sizeof...(canonical_slices) == sizeof...(slices)); - // TODO FIX IN PROPOSAL: [canonical_]slices (incorrect formatting). - auto sub_map_result = submdspan_mapping(src.mapping(), canonical_slices...); - // TODO FIX IN PROPOSAL: It's src.data_handle(), not src.data(). - // Missing "typename" before AccessorPolicy::offset_policy. - return mdspan(src.accessor().offset(src.data_handle(), sub_map_result.offset), - sub_map_result.mapping, - typename AccessorPolicy::offset_policy(src.accessor())); + auto [...canonical_slices] = + submdspan_canonicalize_slices(src.extents(), slices...); + // NOTE Added to P3663R2: [canonical_]slices (incorrect formatting). + auto sub_map_result = + submdspan_mapping(src.mapping(), canonical_slices...); + // NOTE Added to P3663R2: It's src.data_handle(), not src.data(). + // NOTE Added to P3663R2: Missing "typename" before AccessorPolicy::offset_policy. + return mdspan( + src.accessor().offset(src.data_handle(), sub_map_result.offset), + sub_map_result.mapping, + typename AccessorPolicy::offset_policy(src.accessor())); #else const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...); // NVCC has a problem with the deduction so lets figure out the type diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 0cf53ba6..05299cd2 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -749,13 +749,12 @@ constexpr auto canonical_ice(S s) { // TODO Preconditions: If S is a signed or unsigned integer type, // then s is representable as a value of type IndexType. // - // TODO NOT IN PROPOSAL: index-cast result needs to be - // cast again to IndexType, so that we don't get a weird - // constant_wrapper whose value has a different type - // than the second template argument. - - // TODO NOT IN PROPOSAL? Make sure constant_wrapper only has one template argument. - // The first template argument is a value of an exposition-only type, NOT the actual value! + // NOTE Added to P3663R2: Use cw instead of constant_wrapper. + // + // NOTE Added to P3663R2: Specify that index-cast result is + // cast to IndexType before being used as the template argument + // of `cw`, so we don't get a weird constant_wrapper whose value + // has a different type than the second template argument. if constexpr (__mdspan_integral_constant_like) { return std::cw(index_cast(S::value))>; } @@ -796,7 +795,7 @@ template constexpr bool is_std_complex> = true; #endif -// TODO It's impossible to write an "if constexpr" check for +// NOTE It's impossible to write an "if constexpr" check for // "structured binding into two elements is well-formed." Thus, we // must assume that the input Slices are all valid slice types. // One way to do that is to invoke this only post-canonicalization. @@ -806,18 +805,22 @@ constexpr bool is_std_complex> = true; // possible categories of valid slice types in if constexpr, with // the final else attempting the structured binding into two elements. -// TODO NOT IN PROPOSAL: Consider rewriting to use only $S_k$ +// DONE Added to P3663R2: Rewrite wording to use only $S_k$ // and not $s_k$ in check-static-bounds, since we can't use // the actual function parameter in a function that we want // to work in a constant expression. -// TODO NOT IN PROPOSAL: Taking slices parameter(s) makes use -// of check_static_bounds not a constant expression. -// Instead, make Slices... a template parameter pack. +// DONE Added to P3663R2: Implementation takes k and one slice +// only (S_k) as explicit template parameters, rather than +// passing in the whole parameter pack of slices. This makes +// sense because the function only tests one slice (the k-th one). +// Also, taking slices parameter(s) makes use of check_static_bounds +// not a constant expression. + +// DONE Added to P3663R2: Check wording of check-static-bounds +// so that it only assumes that types are default constructible +// in constant expressions if they are integral-constant-like. -// TODO NOT IN PROPOSAL: It's easier to have a single Slice -// as a template parameter pack. This makes sense because -// the function only tests one slice (the k-th one) anyway. template constexpr check_static_bounds_result check_static_bounds( const extents&) @@ -827,6 +830,10 @@ template } else if constexpr (std::is_convertible_v) { if constexpr (__mdspan_integral_constant_like) { + // integral-constant-like types are default constructible + // in constant expressions, so it's OK to use S_k{} here + // instead of std::declval. Also, expressions like + // de_ice(std::declval()) are not constant expressions. if constexpr (de_ice(S_k{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.1 } @@ -845,39 +852,43 @@ template } } else if constexpr (is_strided_slice::value) { - if constexpr (__mdspan_integral_constant_like) { - if constexpr (de_ice(S_k{}.offset) < 0) { + using offset_type = typename S_k::offset_type; + + if constexpr (__mdspan_integral_constant_like) { + if constexpr (de_ice(offset_type{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.1 } else if constexpr ( - Exts...[k] != dynamic_extent && Exts...[k] < de_ice(S_k{}.offset)) + Exts...[k] != dynamic_extent && Exts...[k] < de_ice(offset_type{})) { return check_static_bounds_result::out_of_bounds; // 14.3.2 } - else if constexpr ( - __mdspan_integral_constant_like && - de_ice(S_k{}.offset) + de_ice(S_k{}.extent) < 0) - { - return check_static_bounds_result::out_of_bounds; // 14.3.3 - } - else if constexpr ( - Exts...[k] != dynamic_extent && - __mdspan_integral_constant_like && - Exts...[k] < de_ice(S_k{}.offset) + de_ice(S_k{}.extent)) - { - return check_static_bounds_result::out_of_bounds; // 14.3.4 - } - else if constexpr ( - Exts...[k] != dynamic_extent && - __mdspan_integral_constant_like && - 0 <= de_ice(S_k{}.offset) && - de_ice(S_k{}.offset) <= de_ice(S_k{}.offset) + de_ice(S_k{}.extent) && - de_ice(S_k{}.offset) + de_ice(S_k{}.extent) <= Exts...[k]) - { - return check_static_bounds_result::in_bounds; // 14.3.5 + else if constexpr (__mdspan_integral_constant_like) { + using extent_type = typename S_k::extent_type; + + if constexpr (de_ice(offset_type{}) + de_ice(extent_type{}) < 0) { + return check_static_bounds_result::out_of_bounds; // 14.3.3 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + Exts...[k] < de_ice(offset_type{}) + de_ice(extent_type{})) + { + return check_static_bounds_result::out_of_bounds; // 14.3.4 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + 0 <= de_ice(offset_type{}) && + de_ice(offset_type{}) <= de_ice(offset_type{}) + de_ice(extent_type{}) && + de_ice(offset_type{}) + de_ice(extent_type{}) <= Exts...[k]) + { + return check_static_bounds_result::in_bounds; // 14.3.5 + } + else { + return check_static_bounds_result::unknown; // 14.3.6 + } } else { - return check_static_bounds_result::unknown; // 14.3.6 + return check_static_bounds_result::unknown; // 14.5 } } else { // strided_slice but offset_type isn't integral-constant-like @@ -895,10 +906,7 @@ template // NOTE: This case means that check_static_bounds cannot be // well-formed if it didn't fall into one of the above cases // and if it can't be destructured into two elements. - - // We can't use s_k on the right-hand side here, because it's not a constant expression. - // We can't use S_k{} here either, because that presumes that it's default constructible. - // We can only use std::declval() in an unevaluated context. + // That implements the Mandates clause. auto get_first = [] (S_k s_k) { auto [s_k0, _] = s_k; return s_k0; @@ -919,27 +927,29 @@ template { return check_static_bounds_result::out_of_bounds; // 14.4.2 } - else if constexpr ( - __mdspan_integral_constant_like && - de_ice(S_k1{}) < de_ice(S_k0{})) - { - return check_static_bounds_result::out_of_bounds; // 14.4.3 - } - else if constexpr ( - Exts...[k] != dynamic_extent && - __mdspan_integral_constant_like && - Exts...[k] < de_ice(S_k1{})) - { - return check_static_bounds_result::out_of_bounds; // 14.4.4 - } - else if constexpr ( - Exts...[k] != dynamic_extent && - __mdspan_integral_constant_like && - 0 <= de_ice(S_k0{}) && - de_ice(S_k0{}) <= de_ice(S_k1{}) && - de_ice(S_k1{}) <= Exts...[k]) - { - return check_static_bounds_result::in_bounds; // 14.4.5 + else if constexpr (__mdspan_integral_constant_like) { + if constexpr ( + de_ice(S_k1{}) < de_ice(S_k0{})) + { + return check_static_bounds_result::out_of_bounds; // 14.4.3 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + Exts...[k] < de_ice(S_k1{})) + { + return check_static_bounds_result::out_of_bounds; // 14.4.4 + } + else if constexpr ( + Exts...[k] != dynamic_extent && + 0 <= de_ice(S_k0{}) && + de_ice(S_k0{}) <= de_ice(S_k1{}) && + de_ice(S_k1{}) <= Exts...[k]) + { + return check_static_bounds_result::in_bounds; // 14.4.5 + } + else { + return check_static_bounds_result::unknown; // 14.4.6 + } } else { return check_static_bounds_result::unknown; // 14.4.6 From 26faa32e999d5ab3661e17b0a241469192677fa2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 3 Jun 2025 16:07:05 -0600 Subject: [PATCH 026/103] Add check_static_bounds test It currently exercises the following cases for a variety of extents types. * full_extent_t * integral_constant * IndexType --- tests/CMakeLists.txt | 1 + tests/test_submdspan_check_static_bounds.cpp | 130 +++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 tests/test_submdspan_check_static_bounds.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4a59960b..cf91ab88 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -119,4 +119,5 @@ if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 17)) endif() if(MDSPAN_ENABLE_P3663) mdspan_add_test(test_canonicalize_slices) + mdspan_add_test(test_submdspan_check_static_bounds) endif() diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp new file mode 100644 index 00000000..47c8eec4 --- /dev/null +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -0,0 +1,130 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include +#include +#include +#include + +#include + +namespace { + +template +void test_check_static_bounds( + Kokkos::extents extents, + Kokkos::detail::check_static_bounds_result expected_result) +{ + using Kokkos::detail::check_static_bounds; + using Kokkos::detail::check_static_bounds_result; + + auto result = check_static_bounds(extents); + static_assert(std::is_same_v); + EXPECT_EQ(result, expected_result); +} + +template +void test_full_extent( + Kokkos::extents extents) +{ + [&] (std::index_sequence) { + using Kokkos::detail::check_static_bounds_result; + + (test_check_static_bounds(extents, check_static_bounds_result::in_bounds), ...); + } (std::make_index_sequence()); +} + +template +using IC = std::integral_constant; + +TEST(Submdspan, CheckStaticBounds) { + using Kokkos::detail::check_static_bounds; + using Kokkos::detail::check_static_bounds_result; + + { + auto exts = Kokkos::extents{5, 7, 11}; + test_full_extent(exts); + + test_check_static_bounds<0, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + + test_check_static_bounds<0, IC<13>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, IC<13>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, IC<13>>(exts, check_static_bounds_result::out_of_bounds); + + test_check_static_bounds<0, IC<3>>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<1, IC<3>>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<2, IC<3>>(exts, check_static_bounds_result::in_bounds); + + test_check_static_bounds<0, IC<6>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, IC<6>>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<2, IC<6>>(exts, check_static_bounds_result::in_bounds); + + test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + } + { + auto exts = Kokkos::dims<3>{5, 7, 11}; + test_full_extent(exts); + + test_check_static_bounds<0, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + + test_check_static_bounds<0, IC<13>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, IC<13>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, IC<13>>(exts, check_static_bounds_result::unknown); + + test_check_static_bounds<0, IC<3>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, IC<3>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, IC<3>>(exts, check_static_bounds_result::unknown); + + test_check_static_bounds<0, IC<6>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, IC<6>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, IC<6>>(exts, check_static_bounds_result::unknown); + + test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + } + { + auto exts = Kokkos::extents{5, 7, 11}; + test_full_extent(exts); + + test_check_static_bounds<0, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + + test_check_static_bounds<0, IC<13>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, IC<13>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, IC<13>>(exts, check_static_bounds_result::out_of_bounds); + + test_check_static_bounds<0, IC<3>>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<1, IC<3>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, IC<3>>(exts, check_static_bounds_result::in_bounds); + + test_check_static_bounds<0, IC<6>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, IC<6>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, IC<6>>(exts, check_static_bounds_result::in_bounds); + + test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + } +} + +} // namespace (anonymous) From b804facaeff4492da9d7e82ce67f024960d7e601 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 3 Jun 2025 16:09:47 -0600 Subject: [PATCH 027/103] check_static_bounds: add convertible to full_extents_t test --- tests/test_submdspan_check_static_bounds.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index 47c8eec4..0e57a725 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -22,6 +22,12 @@ namespace { +struct convertible_to_full_extent_t { + constexpr operator Kokkos::full_extent_t() const { + return Kokkos::full_extent; + } +}; + template void test_check_static_bounds( Kokkos::extents extents, @@ -39,11 +45,15 @@ template void test_full_extent( Kokkos::extents extents) { - [&] (std::index_sequence) { - using Kokkos::detail::check_static_bounds_result; + using Kokkos::detail::check_static_bounds_result; + [&] (std::index_sequence) { (test_check_static_bounds(extents, check_static_bounds_result::in_bounds), ...); } (std::make_index_sequence()); + + [&] (std::index_sequence) { + (test_check_static_bounds(extents, check_static_bounds_result::in_bounds), ...); + } (std::make_index_sequence()); } template From 96c4f4962d50331412363fd77ea38d874809818c Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 3 Jun 2025 16:37:54 -0600 Subject: [PATCH 028/103] check_static_bounds: Add strided_slice tests --- tests/test_submdspan_check_static_bounds.cpp | 358 +++++++++++++++++++ 1 file changed, 358 insertions(+) diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index 0e57a725..182ac112 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -62,6 +62,7 @@ using IC = std::integral_constant; TEST(Submdspan, CheckStaticBounds) { using Kokkos::detail::check_static_bounds; using Kokkos::detail::check_static_bounds_result; + using Kokkos::strided_slice; { auto exts = Kokkos::extents{5, 7, 11}; @@ -86,6 +87,125 @@ TEST(Submdspan, CheckStaticBounds) { test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + + test_check_static_bounds<0, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, unsigned short>(exts, check_static_bounds_result::unknown); + + // 14.3.1.1 + { + using offset_type = IC<-1>; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<-1>; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.2 + { + using offset_type = IC<13>; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<13>; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.3 + { + using offset_type = IC<1>; + using extent_type = IC<-2>; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<1>; + using extent_type = IC<-2>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.4 + { + using offset_type = IC<4>; // in bounds + using extent_type = IC<8>; // out of bounds + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<4>; // in bounds + using extent_type = IC<8>; // out of bounds + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.5 + { + using offset_type = IC<1>; // in bounds + using extent_type = IC<2>; // in bounds + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::in_bounds); + } + { + using offset_type = IC<1>; // in bounds + using extent_type = IC<2>; // in bounds + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::in_bounds); + } + // 14.3.1.6 + { + using offset_type = int; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + { + using offset_type = int; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } } { auto exts = Kokkos::dims<3>{5, 7, 11}; @@ -110,6 +230,125 @@ TEST(Submdspan, CheckStaticBounds) { test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + + test_check_static_bounds<0, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, unsigned short>(exts, check_static_bounds_result::unknown); + + // 14.3.1.1 + { + using offset_type = IC<-1>; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<-1>; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.2 + { + using offset_type = IC<13>; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + { + using offset_type = IC<13>; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + // 14.3.1.3 + { + using offset_type = IC<1>; + using extent_type = IC<-2>; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<1>; + using extent_type = IC<-2>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.4 + { + using offset_type = IC<4>; // in bounds + using extent_type = IC<8>; // out of bounds + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + { + using offset_type = IC<4>; // in bounds + using extent_type = IC<8>; // out of bounds + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + // 14.3.1.5 + { + using offset_type = IC<1>; // in bounds + using extent_type = IC<2>; // in bounds + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + { + using offset_type = IC<1>; // in bounds + using extent_type = IC<2>; // in bounds + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + // 14.3.1.6 + { + using offset_type = int; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + { + using offset_type = int; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } } { auto exts = Kokkos::extents{5, 7, 11}; @@ -134,6 +373,125 @@ TEST(Submdspan, CheckStaticBounds) { test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + + test_check_static_bounds<0, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, unsigned short>(exts, check_static_bounds_result::unknown); + + // 14.3.1.1 + { + using offset_type = IC<-1>; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<-1>; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.2 + { + using offset_type = IC<13>; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<13>; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.3 + { + using offset_type = IC<1>; + using extent_type = IC<-2>; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<1>; + using extent_type = IC<-2>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.4 + { + using offset_type = IC<4>; // in bounds + using extent_type = IC<8>; // out of bounds + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + { + using offset_type = IC<4>; // in bounds + using extent_type = IC<8>; // out of bounds + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + } + // 14.3.1.5 + { + using offset_type = IC<1>; // in bounds + using extent_type = IC<2>; // in bounds + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::in_bounds); + } + { + using offset_type = IC<1>; // in bounds + using extent_type = IC<2>; // in bounds + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::in_bounds); + } + // 14.3.1.6 + { + using offset_type = int; + using extent_type = int; + using stride_type = int; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } + { + using offset_type = int; + using extent_type = IC<1>; + using stride_type = IC<1>; + using slice_type = strided_slice; + test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + } } } From f7b43a2c15943711533b21eb891fc7da892ddea3 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 3 Jun 2025 16:48:31 -0600 Subject: [PATCH 029/103] check_static_bounds: Make test more concise --- tests/test_submdspan_check_static_bounds.cpp | 355 ++++++++++--------- 1 file changed, 191 insertions(+), 164 deletions(-) diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index 182ac112..fed31b18 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -15,9 +15,9 @@ //@HEADER #include #include +#include #include -#include - +#include #include namespace { @@ -27,6 +27,30 @@ struct convertible_to_full_extent_t { return Kokkos::full_extent; } }; +static_assert(std::is_convertible_v); + +struct foo {}; +struct bar {}; + +template +concept has_get_like_pair = requires(T t) { + { std::get<0>(t) } -> std::convertible_to; + { std::get<1>(t) } -> std::convertible_to; +}; +static_assert(has_get_like_pair, std::pair>); +static_assert(has_get_like_pair, std::pair>); + +// Structured binding with two elements is valid, +// but it's not convertible to pair or tuple, +// and neither get<0> nor get<1> work on it. +template +struct my_pair { + First first; + Second second; +}; +static_assert(! std::is_convertible_v, std::pair>); +static_assert(! std::is_convertible_v, std::tuple>); +static_assert(! has_get_like_pair, std::pair>); template void test_check_static_bounds( @@ -63,34 +87,37 @@ TEST(Submdspan, CheckStaticBounds) { using Kokkos::detail::check_static_bounds; using Kokkos::detail::check_static_bounds_result; using Kokkos::strided_slice; + constexpr auto OOB = check_static_bounds_result::out_of_bounds; + constexpr auto INB = check_static_bounds_result::in_bounds; + constexpr auto UNK = check_static_bounds_result::unknown; { auto exts = Kokkos::extents{5, 7, 11}; test_full_extent(exts); - test_check_static_bounds<0, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, IC<-1>>(exts, OOB); + test_check_static_bounds<1, IC<-1>>(exts, OOB); + test_check_static_bounds<2, IC<-1>>(exts, OOB); - test_check_static_bounds<0, IC<13>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, IC<13>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, IC<13>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, IC<13>>(exts, OOB); + test_check_static_bounds<1, IC<13>>(exts, OOB); + test_check_static_bounds<2, IC<13>>(exts, OOB); - test_check_static_bounds<0, IC<3>>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<1, IC<3>>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<2, IC<3>>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<0, IC<3>>(exts, INB); + test_check_static_bounds<1, IC<3>>(exts, INB); + test_check_static_bounds<2, IC<3>>(exts, INB); - test_check_static_bounds<0, IC<6>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, IC<6>>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<2, IC<6>>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<0, IC<6>>(exts, OOB); + test_check_static_bounds<1, IC<6>>(exts, INB); + test_check_static_bounds<2, IC<6>>(exts, INB); - test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, int>(exts, UNK); + test_check_static_bounds<1, int>(exts, UNK); + test_check_static_bounds<2, int>(exts, UNK); - test_check_static_bounds<0, unsigned short>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, unsigned short>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, unsigned short>(exts, UNK); + test_check_static_bounds<1, unsigned short>(exts, UNK); + test_check_static_bounds<2, unsigned short>(exts, UNK); // 14.3.1.1 { @@ -98,18 +125,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<-1>; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.2 { @@ -117,18 +144,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<13>; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.3 { @@ -136,18 +163,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<-2>; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<1>; using extent_type = IC<-2>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.4 { @@ -155,18 +182,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<8>; // out of bounds using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<4>; // in bounds using extent_type = IC<8>; // out of bounds using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.5 { @@ -174,18 +201,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<2>; // in bounds using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<0, slice_type>(exts, INB); + test_check_static_bounds<1, slice_type>(exts, INB); + test_check_static_bounds<2, slice_type>(exts, INB); } { using offset_type = IC<1>; // in bounds using extent_type = IC<2>; // in bounds using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<0, slice_type>(exts, INB); + test_check_static_bounds<1, slice_type>(exts, INB); + test_check_static_bounds<2, slice_type>(exts, INB); } // 14.3.1.6 { @@ -193,47 +220,47 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } { using offset_type = int; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } } { auto exts = Kokkos::dims<3>{5, 7, 11}; test_full_extent(exts); - test_check_static_bounds<0, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, IC<-1>>(exts, OOB); + test_check_static_bounds<1, IC<-1>>(exts, OOB); + test_check_static_bounds<2, IC<-1>>(exts, OOB); - test_check_static_bounds<0, IC<13>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, IC<13>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, IC<13>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, IC<13>>(exts, UNK); + test_check_static_bounds<1, IC<13>>(exts, UNK); + test_check_static_bounds<2, IC<13>>(exts, UNK); - test_check_static_bounds<0, IC<3>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, IC<3>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, IC<3>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, IC<3>>(exts, UNK); + test_check_static_bounds<1, IC<3>>(exts, UNK); + test_check_static_bounds<2, IC<3>>(exts, UNK); - test_check_static_bounds<0, IC<6>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, IC<6>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, IC<6>>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, IC<6>>(exts, UNK); + test_check_static_bounds<1, IC<6>>(exts, UNK); + test_check_static_bounds<2, IC<6>>(exts, UNK); - test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, int>(exts, UNK); + test_check_static_bounds<1, int>(exts, UNK); + test_check_static_bounds<2, int>(exts, UNK); - test_check_static_bounds<0, unsigned short>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, unsigned short>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, unsigned short>(exts, UNK); + test_check_static_bounds<1, unsigned short>(exts, UNK); + test_check_static_bounds<2, unsigned short>(exts, UNK); // 14.3.1.1 { @@ -241,18 +268,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<-1>; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.2 { @@ -260,18 +287,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } { using offset_type = IC<13>; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.3.1.3 { @@ -279,18 +306,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<-2>; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<1>; using extent_type = IC<-2>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.4 { @@ -298,18 +325,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<8>; // out of bounds using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } { using offset_type = IC<4>; // in bounds using extent_type = IC<8>; // out of bounds using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.3.1.5 { @@ -317,18 +344,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<2>; // in bounds using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } { using offset_type = IC<1>; // in bounds using extent_type = IC<2>; // in bounds using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.3.1.6 { @@ -336,47 +363,47 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } { using offset_type = int; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } } { auto exts = Kokkos::extents{5, 7, 11}; test_full_extent(exts); - test_check_static_bounds<0, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, IC<-1>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, IC<-1>>(exts, OOB); + test_check_static_bounds<1, IC<-1>>(exts, OOB); + test_check_static_bounds<2, IC<-1>>(exts, OOB); - test_check_static_bounds<0, IC<13>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, IC<13>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, IC<13>>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, IC<13>>(exts, OOB); + test_check_static_bounds<1, IC<13>>(exts, UNK); + test_check_static_bounds<2, IC<13>>(exts, OOB); - test_check_static_bounds<0, IC<3>>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<1, IC<3>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, IC<3>>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<0, IC<3>>(exts, INB); + test_check_static_bounds<1, IC<3>>(exts, UNK); + test_check_static_bounds<2, IC<3>>(exts, INB); - test_check_static_bounds<0, IC<6>>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, IC<6>>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, IC<6>>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<0, IC<6>>(exts, OOB); + test_check_static_bounds<1, IC<6>>(exts, UNK); + test_check_static_bounds<2, IC<6>>(exts, INB); - test_check_static_bounds<0, int>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, int>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, int>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, int>(exts, UNK); + test_check_static_bounds<1, int>(exts, UNK); + test_check_static_bounds<2, int>(exts, UNK); - test_check_static_bounds<0, unsigned short>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, unsigned short>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, unsigned short>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, unsigned short>(exts, UNK); + test_check_static_bounds<1, unsigned short>(exts, UNK); + test_check_static_bounds<2, unsigned short>(exts, UNK); // 14.3.1.1 { @@ -384,18 +411,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<-1>; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.2 { @@ -403,18 +430,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<13>; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.3 { @@ -422,18 +449,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<-2>; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<1>; using extent_type = IC<-2>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.4 { @@ -441,18 +468,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<8>; // out of bounds using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, OOB); } { using offset_type = IC<4>; // in bounds using extent_type = IC<8>; // out of bounds using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::out_of_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::out_of_bounds); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.3.1.5 { @@ -460,18 +487,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = IC<2>; // in bounds using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<0, slice_type>(exts, INB); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, INB); } { using offset_type = IC<1>; // in bounds using extent_type = IC<2>; // in bounds using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::in_bounds); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::in_bounds); + test_check_static_bounds<0, slice_type>(exts, INB); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, INB); } // 14.3.1.6 { @@ -479,18 +506,18 @@ TEST(Submdspan, CheckStaticBounds) { using extent_type = int; using stride_type = int; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } { using offset_type = int; using extent_type = IC<1>; using stride_type = IC<1>; using slice_type = strided_slice; - test_check_static_bounds<0, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<1, slice_type>(exts, check_static_bounds_result::unknown); - test_check_static_bounds<2, slice_type>(exts, check_static_bounds_result::unknown); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); } } } From d9bccc165d4a9758a93c6c539077067a0701c3be Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 4 Jun 2025 11:37:13 -0600 Subject: [PATCH 030/103] check_static_bounds: Improve 14.4 testing Improve testing for the case where a structured binding of the slice into two elements is valid. Test includes both aggregates and non-aggregates that opt into the tuple protocol. --- tests/test_submdspan_check_static_bounds.cpp | 276 +++++++++++++++++-- 1 file changed, 257 insertions(+), 19 deletions(-) diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index fed31b18..f7ff18b0 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -14,55 +14,115 @@ // //@HEADER #include +#include #include +#include #include #include #include #include -namespace { +namespace test { -struct convertible_to_full_extent_t { - constexpr operator Kokkos::full_extent_t() const { - return Kokkos::full_extent; - } +template +concept has_get_like_pair = requires(T t) { + { get<0>(t) } -> std::convertible_to; + { get<1>(t) } -> std::convertible_to; }; -static_assert(std::is_convertible_v); struct foo {}; struct bar {}; -template -concept has_get_like_pair = requires(T t) { - { std::get<0>(t) } -> std::convertible_to; - { std::get<1>(t) } -> std::convertible_to; -}; +static_assert(has_get_like_pair, std::pair>); static_assert(has_get_like_pair, std::pair>); static_assert(has_get_like_pair, std::pair>); -// Structured binding with two elements is valid, -// but it's not convertible to pair or tuple, +// Not an aggregate type, but opts into structured binding +// through the tuple protocol. Has more than two members, +// so without the tuple protocol, it could never be a valid +// candidate for structured binding into two members. +template +class non_aggregate_pair { +public: + constexpr non_aggregate_pair(First first, Second second) + : first(first), second(second) + {} + + template + friend constexpr auto get(const non_aggregate_pair& p) { + static_assert(k <= 1, "k must be 0 or 1"); + if constexpr (k == 0) { + return p.first; + } + else { + return p.second; + } + }; + + constexpr foo get_foo() const { return foo_; } + constexpr bar get_bar() const { return bar_; } + +private: + First first; + foo foo_{}; + Second second; + bar bar_{}; +}; + +static_assert(! std::is_default_constructible_v>); +static_assert(test::has_get_like_pair, std::pair>); +static_assert(! std::is_convertible_v, std::pair>); +static_assert(! std::is_convertible_v, std::tuple>); + +} // namespace test + +template +struct std::tuple_size> : + std::integral_constant {}; + +template +struct std::tuple_element<0, test::non_aggregate_pair> { + using type = First; +}; + +template +struct std::tuple_element<1, test::non_aggregate_pair> { + using type = Second; +}; + +namespace { + +struct convertible_to_full_extent_t { + constexpr operator Kokkos::full_extent_t() const { + return Kokkos::full_extent; + } +}; +static_assert(std::is_convertible_v); + +// Aggregate type with two members. +// It's not convertible to pair or tuple, // and neither get<0> nor get<1> work on it. template -struct my_pair { +struct aggregate_pair { First first; Second second; }; -static_assert(! std::is_convertible_v, std::pair>); -static_assert(! std::is_convertible_v, std::tuple>); -static_assert(! has_get_like_pair, std::pair>); +static_assert(! std::is_convertible_v, std::pair>); +static_assert(! std::is_convertible_v, std::tuple>); +static_assert(! test::has_get_like_pair, std::pair>); template void test_check_static_bounds( Kokkos::extents extents, - Kokkos::detail::check_static_bounds_result expected_result) + Kokkos::detail::check_static_bounds_result expected_result, + const std::source_location location = std::source_location::current()) { using Kokkos::detail::check_static_bounds; using Kokkos::detail::check_static_bounds_result; auto result = check_static_bounds(extents); static_assert(std::is_same_v); - EXPECT_EQ(result, expected_result); + EXPECT_EQ(result, expected_result) << "on line " << location.line(); } template @@ -233,6 +293,95 @@ TEST(Submdspan, CheckStaticBounds) { test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } + + // General 14.4 (just to show well-formedness + // for a variety of types that smell like pair) + { + using slice_type = decltype(test::non_aggregate_pair{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(std::pair{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(std::tuple{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + + // 14.4.1.1 + { + using slice_type = decltype(aggregate_pair{IC<-1>{}, IC<0>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + { + using slice_type = decltype(aggregate_pair{IC<-1>{}, int{0}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.2 + { + using slice_type = decltype(aggregate_pair{IC<13>{}, IC<0>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + { + using slice_type = decltype(aggregate_pair{IC<13>{}, int{0}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.3 + { + using slice_type = decltype(aggregate_pair{IC<1>{}, IC<0>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.4 + { + using slice_type = decltype(aggregate_pair{IC<0>{}, IC<13>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.5 + { + using slice_type = decltype(aggregate_pair{IC<1>{}, IC<3>{}}); + test_check_static_bounds<0, slice_type>(exts, INB); + test_check_static_bounds<1, slice_type>(exts, INB); + test_check_static_bounds<2, slice_type>(exts, INB); + } + // 14.4.1.6 + { + using slice_type = decltype(aggregate_pair{IC<1>{}, int{3}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + // 14.4.2 + { + using slice_type = decltype(aggregate_pair{int{1}, IC<3>{}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(aggregate_pair{int{1}, int{3}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } } { auto exts = Kokkos::dims<3>{5, 7, 11}; @@ -376,6 +525,95 @@ TEST(Submdspan, CheckStaticBounds) { test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } + + // General 14.4 (just to show well-formedness + // for a variety of types that smell like pair) + { + using slice_type = decltype(test::non_aggregate_pair{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(std::pair{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(std::tuple{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + + // 14.4.1.1 + { + using slice_type = decltype(aggregate_pair{IC<-1>{}, IC<0>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + { + using slice_type = decltype(aggregate_pair{IC<-1>{}, int{0}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.2 (actually 14.4.1.6) + { + using slice_type = decltype(aggregate_pair{IC<13>{}, IC<14>{}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(aggregate_pair{IC<13>{}, int{14}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + // 14.4.1.3 + { + using slice_type = decltype(aggregate_pair{IC<1>{}, IC<0>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.4 (actually 14.4.1.6) + { + using slice_type = decltype(aggregate_pair{IC<0>{}, IC<13>{}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + // 14.4.1.5 (actually 14.4.1.6) + { + using slice_type = decltype(aggregate_pair{IC<1>{}, IC<3>{}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + // 14.4.1.6 + { + using slice_type = decltype(aggregate_pair{IC<1>{}, int{3}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + // 14.4.2 + { + using slice_type = decltype(aggregate_pair{int{1}, IC<3>{}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(aggregate_pair{int{1}, int{3}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } } { auto exts = Kokkos::extents{5, 7, 11}; From 4403b82fb992a82673859a143da2f535ea52ac21 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 4 Jun 2025 11:56:48 -0600 Subject: [PATCH 031/103] check_static_bounds: Improve 14.4 testing more The test should now have complete coverage of all the cases, for all combinations of static and/or dynamic extents. --- tests/test_submdspan_check_static_bounds.cpp | 89 ++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index f7ff18b0..aa2160f3 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -757,6 +757,95 @@ TEST(Submdspan, CheckStaticBounds) { test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } + + // General 14.4 (just to show well-formedness + // for a variety of types that smell like pair) + { + using slice_type = decltype(test::non_aggregate_pair{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(std::pair{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(std::tuple{0, 1}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + + // 14.4.1.1 + { + using slice_type = decltype(aggregate_pair{IC<-1>{}, IC<0>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + { + using slice_type = decltype(aggregate_pair{IC<-1>{}, int{0}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.2 (and 14.4.1.6) + { + using slice_type = decltype(aggregate_pair{IC<13>{}, IC<14>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, UNK); // 14.4.1.6 + test_check_static_bounds<2, slice_type>(exts, OOB); + } + { + using slice_type = decltype(aggregate_pair{IC<13>{}, int{14}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, UNK); // 14.4.1.6 + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.3 + { + using slice_type = decltype(aggregate_pair{IC<1>{}, IC<0>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, OOB); + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.4 (and 14.4.1.6) + { + using slice_type = decltype(aggregate_pair{IC<0>{}, IC<13>{}}); + test_check_static_bounds<0, slice_type>(exts, OOB); + test_check_static_bounds<1, slice_type>(exts, UNK); // 14.4.1.6 + test_check_static_bounds<2, slice_type>(exts, OOB); + } + // 14.4.1.5 (and 14.4.1.6) + { + using slice_type = decltype(aggregate_pair{IC<1>{}, IC<3>{}}); + test_check_static_bounds<0, slice_type>(exts, INB); + test_check_static_bounds<1, slice_type>(exts, UNK); // 14.4.1.6 + test_check_static_bounds<2, slice_type>(exts, INB); + } + // 14.4.1.6 + { + using slice_type = decltype(aggregate_pair{IC<1>{}, int{3}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + // 14.4.2 + { + using slice_type = decltype(aggregate_pair{int{1}, IC<3>{}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } + { + using slice_type = decltype(aggregate_pair{int{1}, int{3}}); + test_check_static_bounds<0, slice_type>(exts, UNK); + test_check_static_bounds<1, slice_type>(exts, UNK); + test_check_static_bounds<2, slice_type>(exts, UNK); + } } } From 2e10f5cbda7bef4126b0283d426df47a9e5d8d63 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 4 Jun 2025 21:48:19 -0600 Subject: [PATCH 032/103] Add 1st draft of submdspan benchmark Currently only the CPU version works. --- benchmarks/CMakeLists.txt | 1 + benchmarks/submdspan/CMakeLists.txt | 6 + benchmarks/submdspan/cuda/CMakeLists.txt | 9 + benchmarks/submdspan/cuda/submdspan_cuda.cu | 306 ++++++++++++++++++++ benchmarks/submdspan/submdspan.cpp | 97 +++++++ 5 files changed, 419 insertions(+) create mode 100644 benchmarks/submdspan/CMakeLists.txt create mode 100644 benchmarks/submdspan/cuda/CMakeLists.txt create mode 100644 benchmarks/submdspan/cuda/submdspan_cuda.cu create mode 100644 benchmarks/submdspan/submdspan.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index e2c477c1..921c762a 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -92,3 +92,4 @@ add_subdirectory(matvec) add_subdirectory(copy) add_subdirectory(stencil) add_subdirectory(tiny_matrix_add) +add_subdirectory(submdspan) \ No newline at end of file diff --git a/benchmarks/submdspan/CMakeLists.txt b/benchmarks/submdspan/CMakeLists.txt new file mode 100644 index 00000000..97ad4d33 --- /dev/null +++ b/benchmarks/submdspan/CMakeLists.txt @@ -0,0 +1,6 @@ + +mdspan_add_benchmark(submdspan) + +#if(MDSPAN_ENABLE_CUDA) +# add_subdirectory(cuda) +#endif() diff --git a/benchmarks/submdspan/cuda/CMakeLists.txt b/benchmarks/submdspan/cuda/CMakeLists.txt new file mode 100644 index 00000000..03fcdd35 --- /dev/null +++ b/benchmarks/submdspan/cuda/CMakeLists.txt @@ -0,0 +1,9 @@ + +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda") +endif() + +mdspan_add_cuda_benchmark(submdspan_cuda) +target_include_directories(submdspan_cuda PUBLIC + $ +) diff --git a/benchmarks/submdspan/cuda/submdspan_cuda.cu b/benchmarks/submdspan/cuda/submdspan_cuda.cu new file mode 100644 index 00000000..fc0b30ad --- /dev/null +++ b/benchmarks/submdspan/cuda/submdspan_cuda.cu @@ -0,0 +1,306 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include +#include +#include +#include +#include + +// Whether to let mapping convert index calculation to the type used +// to index into the mdspan +//#define MDSPAN_IMPL_USE_MAPPING_ARG_CAST +// Overwrite what extents.extent() returns and what the actual storage type is +//#define MDSPAN_IMPL_OVERWRITE_EXTENTS_SIZE_TYPE int +// Choose the index type used by the code +using idx_t = size_t; + +#include "fill.hpp" +#include +//================================================================================ + +static constexpr int global_delta = 1; +static constexpr int global_repeat = 16; + +//================================================================================ + +template +using lmdspan = Kokkos::mdspan, Kokkos::layout_left>; +template +using rmdspan = Kokkos::mdspan, Kokkos::layout_right>; + + +void throw_runtime_exception(const std::string &msg) { + std::ostringstream o; + o << msg; + throw std::runtime_error(o.str()); +} + +void cuda_internal_error_throw(cudaError e, const char* name, + const char* file = NULL, const int line = 0) { + std::ostringstream out; + out << name << " error( " << cudaGetErrorName(e) + << "): " << cudaGetErrorString(e); + if (file) { + out << " " << file << ":" << line; + } + throw_runtime_exception(out.str()); +} + +inline void cuda_internal_safe_call(cudaError e, const char* name, + const char* file = NULL, + const int line = 0) { + if (cudaSuccess != e) { + cuda_internal_error_throw(e, name, file, line); + } +} + +#define CUDA_SAFE_CALL(call) \ + cuda_internal_safe_call(call, #call, __FILE__, __LINE__) + +//================================================================================ + +dim3 get_bench_thread_block(size_t y,size_t z) { + cudaDeviceProp cudaProp; + size_t dim_z = 1; + while(dim_z*3(dim_y), static_cast(dim_z)); +} + +template +__global__ +void do_run_kernel(F f, Args... args) { + f(args...); +} + +template +float run_kernel_timed(size_t N, size_t M, size_t K, F&& f, Args&&... args) { + cudaEvent_t start, stop; + CUDA_SAFE_CALL(cudaEventCreate(&start)); + CUDA_SAFE_CALL(cudaEventCreate(&stop)); + + CUDA_SAFE_CALL(cudaEventRecord(start)); + do_run_kernel<<>>( + (F&&)f, ((Args&&) args)... + ); + CUDA_SAFE_CALL(cudaEventRecord(stop)); + CUDA_SAFE_CALL(cudaEventSynchronize(stop)); + float milliseconds = 0; + CUDA_SAFE_CALL(cudaEventElapsedTime(&milliseconds, start, stop)); + return milliseconds; +} + +template +MDSpan fill_device_mdspan(MDSpan, DynSizes... dyn) { + + using value_type = typename MDSpan::value_type; + auto buffer_size = MDSpan{nullptr, dyn...}.mapping().required_span_size(); + auto host_buffer = std::make_unique( + MDSpan{nullptr, dyn...}.mapping().required_span_size() + ); + auto host_mdspan = MDSpan{host_buffer.get(), dyn...}; + mdspan_benchmark::fill_random(host_mdspan); + + value_type* device_buffer = nullptr; + CUDA_SAFE_CALL(cudaMalloc(&device_buffer, buffer_size * sizeof(value_type))); + CUDA_SAFE_CALL(cudaMemcpy( + device_buffer, host_buffer.get(), buffer_size * sizeof(value_type), cudaMemcpyHostToDevice + )); + return MDSpan{device_buffer, dyn...}; +} + +//================================================================================ + +template +void BM_MDSpan_Cuda_Stencil_3D(benchmark::State& state, MDSpan, DynSizes... dyn) { + + using value_type = typename MDSpan::value_type; + auto s = fill_device_mdspan(MDSpan{}, dyn...); + auto o = fill_device_mdspan(MDSpan{}, dyn...); + + idx_t d = static_cast(global_delta); + int repeats = global_repeat==0? (s.extent(0)*s.extent(1)*s.extent(2) > (100*100*100) ? 50 : 1000) : global_repeat; + + auto lambda = + [=] __device__ { + for(int r = 0; r < repeats; ++r) { + for(idx_t i = blockIdx.x+d; i < static_cast(s.extent(0))-d; i += gridDim.x) { + for(idx_t j = threadIdx.z+d; j < static_cast(s.extent(1))-d; j += blockDim.z) { + for(idx_t k = threadIdx.y+d; k < static_cast(s.extent(2))-d; k += blockDim.y) { + for(int q=0; q<128; q++) { + value_type sum_local = o(i,j,k); + for(idx_t di = i-d; di < i+d+1; di++) { + for(idx_t dj = j-d; dj < j+d+1; dj++) { + for(idx_t dk = k-d; dk < k+d+1; dk++) { + sum_local += s(di, dj, dk); + }}} + o(i,j,k) = sum_local; + } + } + } + } + } + }; + run_kernel_timed(s.extent(0),s.extent(1),s.extent(2),lambda); + + for (auto _ : state) { + auto timed = run_kernel_timed(s.extent(0),s.extent(1),s.extent(2),lambda); + // units of cuda timer is milliseconds, units of iteration timer is seconds + state.SetIterationTime(timed * 1e-3); + } + size_t num_inner_elements = (s.extent(0)-d) * (s.extent(1)-d) * (s.extent(2)-d); + size_t stencil_num = (2*d+1) * (2*d+1) * (2*d+1); + state.SetBytesProcessed( num_inner_elements * stencil_num * sizeof(value_type) * state.iterations() * repeats); + state.counters["repeats"] = repeats; + + CUDA_SAFE_CALL(cudaDeviceSynchronize()); + CUDA_SAFE_CALL(cudaFree(s.data_handle())); +} +MDSPAN_BENCHMARK_ALL_3D_MANUAL(BM_MDSpan_Cuda_Stencil_3D, right_, rmdspan, 80, 80, 80); +//MDSPAN_BENCHMARK_ALL_3D_MANUAL(BM_MDSpan_Cuda_Stencil_3D, left_, lmdspan, 80, 80, 80); +//MDSPAN_BENCHMARK_ALL_3D_MANUAL(BM_MDSpan_Cuda_Stencil_3D, right_, rmdspan, 400, 400, 400); +//MDSPAN_BENCHMARK_ALL_3D_MANUAL(BM_MDSpan_Cuda_Stencil_3D, left_, lmdspan, 400, 400, 400); + +//================================================================================ + +template +void BM_Raw_Cuda_Stencil_3D_right(benchmark::State& state, T, SizeX x_, SizeY y_, SizeZ z_) { + + idx_t d = static_cast(global_delta); + idx_t x = static_cast(x_); + idx_t y = static_cast(y_); + idx_t z = static_cast(z_); + + using value_type = T; + value_type* data = nullptr; + value_type* data_o = nullptr; + { + // just for setup... + auto wrapped = Kokkos::mdspan>{}; + auto s = fill_device_mdspan(wrapped, x*y*z); + data = s.data_handle(); + auto o = fill_device_mdspan(wrapped, x*y*z); + data_o = o.data_handle(); + } + + int repeats = global_repeat==0? (x*y*z > (100*100*100) ? 50 : 1000) : global_repeat; + + auto lambda = + [=] __device__ { + for(int r = 0; r < repeats; ++r) { + for(idx_t i = blockIdx.x+d; i < x-d; i += gridDim.x) { + for(idx_t j = threadIdx.z+d; j < y-d; j += blockDim.z) { + for(idx_t k = threadIdx.y+d; k < z-d; k += blockDim.y) { + for(int q=0; q<128; q++) { + value_type sum_local = data_o[k + j*z + i*z*y]; + for(idx_t di = i-d; di < i+d+1; di++) { + for(idx_t dj = j-d; dj < j+d+1; dj++) { + for(idx_t dk = k-d; dk < k+d+1; dk++) { + sum_local += data[dk + dj*z + di*z*y]; + }}} + data_o[k + j*z + i*z*y] = sum_local; + } + } + } + } + } + }; + run_kernel_timed(x,y,z,lambda); + + for (auto _ : state) { + auto timed = run_kernel_timed(x,y,z,lambda); + // units of cuda timer is milliseconds, units of iteration timer is seconds + state.SetIterationTime(timed * 1e-3); + } + size_t num_inner_elements = (x-d) * (y-d) * (z-d); + size_t stencil_num = (2*d+1) * (2*d+1) * (2*d+1); + state.SetBytesProcessed( num_inner_elements * stencil_num * sizeof(value_type) * state.iterations() * repeats); + state.counters["repeats"] = repeats; + + CUDA_SAFE_CALL(cudaDeviceSynchronize()); + CUDA_SAFE_CALL(cudaFree(data)); +} +BENCHMARK_CAPTURE(BM_Raw_Cuda_Stencil_3D_right, size_80_80_80, int(), 80, 80, 80); +BENCHMARK_CAPTURE(BM_Raw_Cuda_Stencil_3D_right, size_400_400_400, int(), 400, 400, 400); + +//================================================================================ + +template +void BM_Raw_Cuda_Stencil_3D_left(benchmark::State& state, T, SizeX x_, SizeY y_, SizeZ z_) { + + idx_t d = static_cast(global_delta); + idx_t x = static_cast(x_); + idx_t y = static_cast(y_); + idx_t z = static_cast(z_); + + using value_type = T; + value_type* data = nullptr; + value_type* data_o = nullptr; + { + // just for setup... + auto wrapped = Kokkos::mdspan>{}; + auto s = fill_device_mdspan(wrapped, x*y*z); + data = s.data_handle(); + auto o = fill_device_mdspan(wrapped, x*y*z); + data_o = o.data_handle(); + } + + int repeats = global_repeat==0? (x*y*z > (100*100*100) ? 50 : 1000) : global_repeat; + auto lambda = + [=] __device__ { + for(int r = 0; r < repeats; ++r) { + for(idx_t i = blockIdx.x+d; i < x-d; i += gridDim.x) { + for(idx_t j = threadIdx.z+d; j < y-d; j += blockDim.z) { + for(idx_t k = threadIdx.y+d; k < z-d; k += blockDim.y) { + for(int q=0; q<128; q++) { + value_type sum_local = data_o[k*x*y + j*x + i]; + for(idx_t di = i-d; di < i+d+1; di++) { + for(idx_t dj = j-d; dj < j+d+1; dj++) { + for(idx_t dk = k-d; dk < k+d+1; dk++) { + sum_local += data[dk*x*y + dj*x + di]; + }}} + data_o[k*x*y + j*x + i] = sum_local; + } + } + } + } + } + }; + + run_kernel_timed(x,y,z,lambda); + + for (auto _ : state) { + auto timed = run_kernel_timed(x,y,z,lambda); + // units of cuda timer is milliseconds, units of iteration timer is seconds + state.SetIterationTime(timed * 1e-3); + } + size_t num_inner_elements = (x-d) * (y-d) * (z-d); + size_t stencil_num = (2*d+1) * (2*d+1) * (2*d+1); + state.SetBytesProcessed( num_inner_elements * stencil_num * sizeof(value_type) * state.iterations() * repeats); + state.counters["repeats"] = repeats; + + CUDA_SAFE_CALL(cudaDeviceSynchronize()); + CUDA_SAFE_CALL(cudaFree(data)); +} +BENCHMARK_CAPTURE(BM_Raw_Cuda_Stencil_3D_left, size_80_80_80, int(), 80, 80, 80); +//BENCHMARK_CAPTURE(BM_Raw_Cuda_Stencil_3D_left, size_400_400_400, int(), 400, 400, 400); + +//================================================================================ + +BENCHMARK_MAIN(); diff --git a/benchmarks/submdspan/submdspan.cpp b/benchmarks/submdspan/submdspan.cpp new file mode 100644 index 00000000..ffde2e3e --- /dev/null +++ b/benchmarks/submdspan/submdspan.cpp @@ -0,0 +1,97 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include "fill.hpp" + +#include + +#include + +#include +#include +#include +#include +#include +#include + +// This benchmark measures the overhead of submdspan slice +// canonicalization as proposed by P3663R2. +// +// Slice canonicalization happens in the submdspan function, +// before slices reach the layout mapping's submdspan_mapping +// customization. Thus, we need to call submdspan itself, +// but the layout mapping type does not matter. +// We do want to exercise a Standard layout mapping, though. +// +// The mdspan's value type doesn't matter either, +// so we can use char to minimize storage. + +template +using nonconst_test_mdspan = Kokkos::mdspan>; + +template +using const_test_mdspan = Kokkos::mdspan>; + +template +size_t submdspan_benchmark(benchmark::State& state, const_test_mdspan x) { + size_t count_not_same = 0; + for (auto _ : state) { + const auto p = std::pair{IndexType(0), IndexType(1)}; + auto x_sub = Kokkos::submdspan(x, ((void) Exts, p)...); + if (x_sub[((void) Exts, 0)...] != x[((void) Exts, p.first)...]) { + ++count_not_same; + } + benchmark::DoNotOptimize(count_not_same); + } + return count_not_same; +} + +template +class benchmark_buffer { +public: + benchmark_buffer(Kokkos::extents exts) : + mapping_{exts}, + buffer_{std::make_unique(mapping_.required_span_size())} + {} + + nonconst_test_mdspan get_mdspan() { + return {buffer_.get(), mapping_}; + } + + const_test_mdspan get_mdspan() const { + return {static_cast(buffer_.get()), mapping_}; + } + +private: + Kokkos::layout_right::template mapping> mapping_; + std::unique_ptr buffer_; +}; + +template +void submdspan_run_benchmark(benchmark::State& state, Kokkos::extents exts) { + auto buffer = benchmark_buffer{exts}; + mdspan_benchmark::fill_random(buffer.get_mdspan()); + size_t count_not_same = submdspan_benchmark(state, std::as_const(buffer).get_mdspan()); + if (count_not_same != 0) { + std::cerr << "submdspan_benchmark failed: count not same = " << count_not_same << std::endl; + } +} + +BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); + +BENCHMARK_MAIN(); From a09cdd3336b49056b0d5753e661ee7b62d45fbf1 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 5 Jun 2025 16:22:53 -0600 Subject: [PATCH 033/103] Benchmark improvements --- benchmarks/submdspan/submdspan.cpp | 48 +++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/benchmarks/submdspan/submdspan.cpp b/benchmarks/submdspan/submdspan.cpp index ffde2e3e..955eeac0 100644 --- a/benchmarks/submdspan/submdspan.cpp +++ b/benchmarks/submdspan/submdspan.cpp @@ -36,23 +36,33 @@ // We do want to exercise a Standard layout mapping, though. // // The mdspan's value type doesn't matter either, -// so we can use char to minimize storage. +// so we can use a char-sized type to minimize storage. +// Using unsigned char makes overflow defined behavior. template -using nonconst_test_mdspan = Kokkos::mdspan>; +using nonconst_test_mdspan = + Kokkos::mdspan>; template -using const_test_mdspan = Kokkos::mdspan>; +using const_test_mdspan = + Kokkos::mdspan>; template -size_t submdspan_benchmark(benchmark::State& state, const_test_mdspan x) { +size_t submdspan_benchmark(benchmark::State& state, + std::ostream& output, + nonconst_test_mdspan out) +{ + output << "buf_0s_before = " << static_cast(out[((void) Exts, 0)...]) << '\n'; + size_t count_not_same = 0; for (auto _ : state) { - const auto p = std::pair{IndexType(0), IndexType(1)}; - auto x_sub = Kokkos::submdspan(x, ((void) Exts, p)...); - if (x_sub[((void) Exts, 0)...] != x[((void) Exts, p.first)...]) { + const auto p = std::pair{IndexType(0), IndexType(1)}; + auto out_sub = Kokkos::submdspan(out, ((void) Exts, p)...); + if (out_sub[((void) Exts, 0)...] != out[((void) Exts, p.first)...]) { ++count_not_same; } + out_sub[((void) Exts, 0)...] += static_cast(1u); + benchmark::DoNotOptimize(count_not_same); } return count_not_same; @@ -61,9 +71,11 @@ size_t submdspan_benchmark(benchmark::State& state, const_test_mdspan class benchmark_buffer { public: + using value_type = unsigned char; + benchmark_buffer(Kokkos::extents exts) : mapping_{exts}, - buffer_{std::make_unique(mapping_.required_span_size())} + buffer_{std::make_unique(mapping_.required_span_size())} {} nonconst_test_mdspan get_mdspan() { @@ -71,22 +83,30 @@ class benchmark_buffer { } const_test_mdspan get_mdspan() const { - return {static_cast(buffer_.get()), mapping_}; + return {static_cast(buffer_.get()), mapping_}; } private: Kokkos::layout_right::template mapping> mapping_; - std::unique_ptr buffer_; + std::unique_ptr buffer_; }; template -void submdspan_run_benchmark(benchmark::State& state, Kokkos::extents exts) { - auto buffer = benchmark_buffer{exts}; - mdspan_benchmark::fill_random(buffer.get_mdspan()); - size_t count_not_same = submdspan_benchmark(state, std::as_const(buffer).get_mdspan()); +void submdspan_run_benchmark(benchmark::State& state, + Kokkos::extents exts) +{ + auto buf = benchmark_buffer{exts}; + mdspan_benchmark::fill_random(buf.get_mdspan()); + + size_t count_not_same = submdspan_benchmark(state, std::cerr, buf.get_mdspan()); if (count_not_same != 0) { std::cerr << "submdspan_benchmark failed: count not same = " << count_not_same << std::endl; + std::terminate(); } + + auto get_0th_element = [] (auto x) { return x[((void) Exts, 0)...]; }; + const auto buf_0s_after = get_0th_element(buf.get_mdspan()); + std::cerr << "buf_0s_after = " << static_cast(buf_0s_after) << '\n'; } BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (Kokkos::extents{})); From d77b1a74b20efdf5cf06765438968d87cb3d15d4 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Jun 2025 14:33:00 -0600 Subject: [PATCH 034/103] Fix convertible-to-index-type bug The current (non-P3663) mdspan implementation incorrectly cannot handle index or slice types that are convertible to index_type, but are not integral-non-bool. This commit fixes that, and adds unit tests for both layout mapping indexing and submdspan slicing. --- .../__p2630_bits/submdspan_extents.hpp | 46 +++- .../__p2630_bits/submdspan_mapping.hpp | 30 +++ tests/CMakeLists.txt | 2 + tests/test_convertible_to_index_type.cpp | 205 ++++++++++++++++++ 4 files changed, 276 insertions(+), 7 deletions(-) create mode 100644 tests/test_convertible_to_index_type.cpp diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 05299cd2..060d94de 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -167,9 +167,33 @@ constexpr Integral first_of(Integral i) { #else +// NOTE (mfh 2025/06/06) The original "return i;" was not conforming, +// in particular for index types that were not integral-not-bool +// but were convertible to index_type. + MDSPAN_TEMPLATE_REQUIRES( class Integral, - /* requires */(std::is_convertible_v) + /* requires */( + ! std::is_signed_v && + ! std::is_unsigned_v && + ( + std::is_convertible_v || + std::is_convertible_v + ) + ) +) +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(const Integral &i) { + // FIXME (mfh 2025/06/06) This is broken, but it's better than it was. + return size_t(i); +} + +MDSPAN_TEMPLATE_REQUIRES( + class Integral, + /* requires */( + std::is_signed_v || + std::is_unsigned_v + ) ) MDSPAN_INLINE_FUNCTION constexpr Integral first_of(const Integral &i) { @@ -197,21 +221,29 @@ first_of(const std::integral_constant&) { } #endif + + +#if defined(MDSPAN_ENABLE_P3663) + MDSPAN_INLINE_FUNCTION constexpr -#if defined(MDSPAN_ENABLE_P3663) auto -#else -integral_constant -#endif first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { -#if defined(MDSPAN_ENABLE_P3663) return std::cw; +} + #else + +MDSPAN_INLINE_FUNCTION +constexpr +integral_constant +first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { return {}; -#endif } +#endif // MDSPAN_ENABLE_P3663 + + // P3663 doesn't need any of these overloads, // because its version of first_of will never see pair-like types. // (The only "contiguous range of indices" slice types it sees are diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index d40921af..5094c427 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -91,10 +91,40 @@ concept mapping_sliceable_with_full_extents = template MDSPAN_INLINE_FUNCTION constexpr bool one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { +#if defined(MDSPAN_ENABLE_P3663) using common_t = std::common_type_t; return static_cast(first_of(slice)) == static_cast(ext); +#else + // NOTE (mfh 2025/06/06) The original implementation was not conforming. + // For index types that are not integral but are nevertheless convertible + // to integral, it would result in build errors when attempting to find + // a common type between first_of(slice) and IndexType. This is because + // first_of(slice) in that case would return the origina slice type, + // which might not necessarily be convertible to IndexType. The problem + // is really in first_of: the analogous function in the Standard, + // _`first`_`_`, is aware of IndexType and casts slices whose types + // are "integral not bool" to IndexType (even before P3663). However, + // first_of doesn't know IndexType and so it can only return the original + // slice in the case where it's convertible to integral-not-bool. + // + // The easy fix is P3663. However, for fair benchmarking between the + // P3663 and no-P3663 cases, we don't want to copy the slice if not needed. + // Thus, we introduce a special case. + if constexpr (std::is_convertible_v && + ! std::is_signed_v> && + ! std::is_unsigned_v>) + { + return first_of(static_cast(slice)) == ext; + } + else { + using common_t = + std::common_type_t; + return static_cast(first_of(slice)) == + static_cast(ext); + } +#endif // MDSPAN_ENABLE_P3663 } template +#include +#include + + +namespace test { + +// Index or slice type that's convertible to IndexType, +// but neither integral nor integral-constant-like. +MDSPAN_TEMPLATE_REQUIRES( + class IndexType, + /* requires */ ( + std::is_signed_v || std::is_unsigned_v + ) +) +class index_holder { +public: + index_holder(IndexType i) : i_{i} {} + constexpr operator IndexType() const noexcept { return i_; } + constexpr index_holder& operator++() noexcept { + ++i_; + return *this; + } +#if defined(__cpp_impl_three_way_comparison) + constexpr auto operator<=>(const index_holder&) const noexcept = default; +#else + friend constexpr bool operator<(const index_holder& x, const index_holder& y) noexcept { + return x.i_ < y.i_; + } + friend constexpr bool operator==(const index_holder& x, const index_holder& y) noexcept { + return x.i_ == y.i_; + } +#endif + +private: + IndexType i_; +}; +static_assert(std::is_convertible_v, int>); +static_assert(std::is_convertible_v, size_t>); +static_assert(std::is_nothrow_constructible_v>); +static_assert(std::is_nothrow_constructible_v>); + +// Slice type that's convertible to full_extent_t, but is not full_extent_t. +struct full_extent_wrapper_t { + constexpr operator Kokkos::full_extent_t() const noexcept{ + return Kokkos::full_extent; + } +}; + + +template +void test_mapping_call_operator(Layout, Kokkos::extents exts) { + using extents_type = Kokkos::extents; + using mapping_type = typename Layout::template mapping; + mapping_type mapping(exts); + + const index_holder wrapped_zero(0); + const IndexType zero(0); + + for (size_t i = 0; i < exts.rank(); ++i) { + auto result = mapping(((void) Exts, wrapped_zero)...); + auto expected_result = mapping(((void) Exts, zero)...); + EXPECT_EQ(result, expected_result); + } +} + +template +void test_submdspan1(Layout, Kokkos::extents exts) { + using extents_type = Kokkos::extents; + using mapping_type = typename Layout::template mapping; + mapping_type mapping(exts); + + auto buffer = std::make_unique(mapping.required_span_size()); + auto view = Kokkos::mdspan(buffer.get(), mapping); + + const index_holder wrapped_zero(0); + const IndexType zero(0); + + auto result = Kokkos::submdspan(view, ((void) Exts, wrapped_zero)...); + auto expected_result = Kokkos::submdspan(view, ((void) Exts, zero)...); + static_assert(std::is_same_v); + EXPECT_EQ(result.mapping(), expected_result.mapping()); +} + +template +void test_submdspan2_inner(const Mdspan& view, std::index_sequence) { + using index_type = typename Mdspan::index_type; + + const index_holder wrapped_zero(0); + const index_type zero(0); + + auto result = Kokkos::submdspan(view, wrapped_zero, ((void) Inds, Kokkos::full_extent)...); + auto expected_result = Kokkos::submdspan(view, zero, ((void) Inds, Kokkos::full_extent)...); + static_assert(std::is_same_v); + EXPECT_EQ(result.mapping(), expected_result.mapping()); +} + +template +void test_submdspan2(Layout, Kokkos::extents exts) { + using extents_type = Kokkos::extents; + using mapping_type = typename Layout::template mapping; + mapping_type mapping(exts); + + auto buffer = std::make_unique(mapping.required_span_size()); + auto view = Kokkos::mdspan(buffer.get(), mapping); + + static_assert(sizeof...(Exts) != 0); + test_submdspan2_inner(view, std::make_index_sequence{}); +} + +template +void test_submdspan3_inner(const Mdspan& view, std::index_sequence) { + using index_type = typename Mdspan::index_type; + + const index_holder wrapped_zero(0); + const index_type zero(0); + + auto result = Kokkos::submdspan(view, wrapped_zero, ((void) Inds, full_extent_wrapper_t{})...); + auto expected_result = Kokkos::submdspan(view, zero, ((void) Inds, full_extent_wrapper_t{})...); + static_assert(std::is_same_v); + EXPECT_EQ(result.mapping(), expected_result.mapping()); +} + +template +void test_submdspan3(Layout, Kokkos::extents exts) { + using extents_type = Kokkos::extents; + using mapping_type = typename Layout::template mapping; + mapping_type mapping(exts); + + auto buffer = std::make_unique(mapping.required_span_size()); + auto view = Kokkos::mdspan(buffer.get(), mapping); + + static_assert(sizeof...(Exts) != 0); + test_submdspan3_inner(view, std::make_index_sequence{}); +} + +} // namespace test + +TEST(ConvertibleToIndexType, CallOperatorLayoutLeft) +{ + test::test_mapping_call_operator(Kokkos::layout_left{}, Kokkos::extents{}); + test::test_mapping_call_operator(Kokkos::layout_left{}, Kokkos::dextents{2, 2, 2, 2, 2, 2}); +} + +TEST(ConvertibleToIndexType, CallOperatorLayoutRight) +{ + test::test_mapping_call_operator(Kokkos::layout_right{}, Kokkos::extents{}); + test::test_mapping_call_operator(Kokkos::layout_right{}, Kokkos::dextents{2, 2, 2, 2, 2, 2}); +} + +TEST(ConvertibleToIndexType, Submdspan1_LayoutLeft) +{ + test::test_submdspan1(Kokkos::layout_left{}, Kokkos::extents{}); + test::test_submdspan1(Kokkos::layout_left{}, Kokkos::dextents{2, 2, 2, 2, 2, 2}); +} + +TEST(ConvertibleToIndexType, Submdspan1_LayoutRight) +{ + test::test_submdspan1(Kokkos::layout_right{}, Kokkos::extents{}); + test::test_submdspan1(Kokkos::layout_right{}, Kokkos::dextents{2, 2, 2, 2, 2, 2}); +} + +TEST(ConvertibleToIndexType, Submdspan2_LayoutLeft) +{ + test::test_submdspan2(Kokkos::layout_left{}, Kokkos::extents{}); + test::test_submdspan2(Kokkos::layout_left{}, Kokkos::dextents{2, 2, 2, 2, 2, 2}); +} + +TEST(ConvertibleToIndexType, Submdspan2_LayoutRight) +{ + test::test_submdspan2(Kokkos::layout_right{}, Kokkos::extents{}); + test::test_submdspan2(Kokkos::layout_right{}, Kokkos::dextents{2, 2, 2, 2, 2, 2}); +} + +TEST(ConvertibleToIndexType, Submdspan3_LayoutLeft) +{ + test::test_submdspan3(Kokkos::layout_left{}, Kokkos::extents{}); + test::test_submdspan3(Kokkos::layout_left{}, Kokkos::dextents{2, 2, 2, 2, 2, 2}); +} + +TEST(ConvertibleToIndexType, Submdspan3_LayoutRight) +{ + test::test_submdspan3(Kokkos::layout_right{}, Kokkos::extents{}); + test::test_submdspan3(Kokkos::layout_right{}, Kokkos::dextents{2, 2, 2, 2, 2, 2}); +} From d8a43502c9699719187019a447cf3f50b4a8564e Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Jun 2025 14:40:27 -0600 Subject: [PATCH 035/103] Fix P3663 benchmark and expand Make sure that the benchmark gets the MDSPAN_ENABLE_P3663 definition (it wasn't before). Add new benchmarks that make it harder for the compiler to optimize away code. (They actually change all the values in the mdspan and check that the changes are mathematically correct.) In benchmark, for the slice type that's convertible to full_extent_t, make the conversion operator noexcept. --- benchmarks/CMakeLists.txt | 9 ++ benchmarks/submdspan/submdspan.cpp | 177 +++++++++++++++++++++++++++-- 2 files changed, 178 insertions(+), 8 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 921c762a..a3329a32 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -7,6 +7,9 @@ function(mdspan_add_benchmark EXENAME) ) # Set flag to build with parenthesis enabled target_compile_definitions(${EXENAME} PRIVATE MDSPAN_USE_PAREN_OPERATOR=1) + if(MDSPAN_ENABLE_P3663) + target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) + endif() endfunction() if(MDSPAN_USE_SYSTEM_BENCHMARK) @@ -66,6 +69,9 @@ function(mdspan_add_cuda_benchmark EXENAME) if(_benchmark_libs_old MATCHES "-pthread") target_compile_options(${EXENAME} PUBLIC "-Xcompiler=-pthread") endif() + if(MDSPAN_ENABLE_P3663) + target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) + endif() endfunction() if(MDSPAN_ENABLE_OPENMP) @@ -81,6 +87,9 @@ function(mdspan_add_openmp_benchmark EXENAME) $ ) target_compile_definitions(${EXENAME} PRIVATE MDSPAN_USE_PAREN_OPERATOR=1) + if(MDSPAN_ENABLE_P3663) + target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) + endif() else() message(WARNING "Not adding target ${EXENAME} because OpenMP was not found") endif() diff --git a/benchmarks/submdspan/submdspan.cpp b/benchmarks/submdspan/submdspan.cpp index 955eeac0..fd8673d7 100644 --- a/benchmarks/submdspan/submdspan.cpp +++ b/benchmarks/submdspan/submdspan.cpp @@ -19,12 +19,13 @@ #include +#include +#include +#include #include #include #include #include -#include -#include // This benchmark measures the overhead of submdspan slice // canonicalization as proposed by P3663R2. @@ -49,11 +50,8 @@ using const_test_mdspan = template size_t submdspan_benchmark(benchmark::State& state, - std::ostream& output, nonconst_test_mdspan out) { - output << "buf_0s_before = " << static_cast(out[((void) Exts, 0)...]) << '\n'; - size_t count_not_same = 0; for (auto _ : state) { const auto p = std::pair{IndexType(0), IndexType(1)}; @@ -78,6 +76,10 @@ class benchmark_buffer { buffer_{std::make_unique(mapping_.required_span_size())} {} + size_t size() const { + return mapping_.required_span_size(); + } + nonconst_test_mdspan get_mdspan() { return {buffer_.get(), mapping_}; } @@ -98,15 +100,15 @@ void submdspan_run_benchmark(benchmark::State& state, auto buf = benchmark_buffer{exts}; mdspan_benchmark::fill_random(buf.get_mdspan()); - size_t count_not_same = submdspan_benchmark(state, std::cerr, buf.get_mdspan()); + size_t count_not_same = submdspan_benchmark(state, buf.get_mdspan()); if (count_not_same != 0) { std::cerr << "submdspan_benchmark failed: count not same = " << count_not_same << std::endl; std::terminate(); } auto get_0th_element = [] (auto x) { return x[((void) Exts, 0)...]; }; - const auto buf_0s_after = get_0th_element(buf.get_mdspan()); - std::cerr << "buf_0s_after = " << static_cast(buf_0s_after) << '\n'; + auto buf_0s_after = get_0th_element(buf.get_mdspan()); + benchmark::DoNotOptimize(buf_0s_after); } BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (Kokkos::extents{})); @@ -114,4 +116,163 @@ BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (Kokkos::dextents{2, BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (Kokkos::extents{})); BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +// Make the compiler work harder by using a slice type +// that's convertible to index_type, but neither integral +// nor integral-constant-like. +template + requires (std::is_signed_v || std::is_unsigned_v) +class index_holder { +public: + index_holder(IndexType i) : i_{i} {} + constexpr operator IndexType() const noexcept { return i_; } + constexpr index_holder& operator++() noexcept { + ++i_; + return *this; + } + constexpr auto operator<=>(const index_holder&) const noexcept = default; + +private: + IndexType i_; +}; +static_assert(std::is_convertible_v, int>); +static_assert(std::is_convertible_v, size_t>); +static_assert(std::is_nothrow_constructible_v>); +static_assert(std::is_nothrow_constructible_v>); + +// Make the compiler work harder by using a slice type +// that's convertible to full_extent_t, but not full_extent_t. +struct same_as_full_extent_t { + constexpr operator Kokkos::full_extent_t() const noexcept { + return Kokkos::full_extent; + } +}; + +template +constexpr auto slice_one_extent( + Kokkos::mdspan, Layout, Accessor> x, Slice slice) +{ + if constexpr (sizeof...(Exts) == 0) { + static_assert(false, "slice_one_extent called with no extents"); + } + else if constexpr (sizeof...(Exts) == 1) { + return Kokkos::submdspan(x, slice); + } + else { + return [&] (std::index_sequence) { + return Kokkos::submdspan(x, slice, ((void) Inds, same_as_full_extent_t{})...); + } (std::make_index_sequence()); + } +} + +// Multiply elements by 3, using 1-D slices. +template +void submdspan_benchmark2_loop(const OutMdspan& out) { + using index_type = typename OutMdspan::index_type; + + if constexpr (OutMdspan::rank() == 0) { + return; + } + else if constexpr (OutMdspan::rank() == 1) { + const auto ext0 = out.extent(0); + for (index_type k = 0; k < ext0; ++k) { + out[k] *= 3u; + } + } + else { + const auto ext0 = index_holder{index_type(out.extent(0))}; + for (auto k = index_holder{index_type(0)}; k < ext0; ++k) { + submdspan_benchmark2_loop(slice_one_extent(out, k)); + } + } +} + +template +size_t submdspan_benchmark2(benchmark::State& state, + nonconst_test_mdspan out) +{ + size_t count = 0; + for (auto _ : state) { + submdspan_benchmark2_loop(out); + ++count; + } + benchmark::DoNotOptimize(count); + return count; +} + +// Elements of x are uint8_t, so computations happen modulo 256. +// For each element x_e of x, on output, result is +// +// (x_e * 3^count) mod 256 +// = ((x_e mod 256) * (3^count mod 256)) mod 256. +// +// If count is a power of two, we can compute (3^count) mod 256 +// by divide and conquer. +// +// (3^count) mod 256 +// = ((3^(count/2)) mod 256) * ((3^(count/2)) mod 256) mod 256. + +inline constexpr size_t +base_to_the_exponent_mod_modulus(size_t base, size_t exponent, size_t modulus) +{ + if (modulus == 1u) { + return 0u; + } + // modulus - 1u) * (modulus - 1u) must not overflow base + size_t result = 1u; + base = base % modulus; + while (exponent > 0u) { + if (exponent % 2u == 1u) { + result = (result * base) % modulus; + } + exponent = exponent >> 1u; + base = (base * base) % modulus; + } + return result; +} + +inline constexpr size_t +expected_element(size_t original_element, size_t count) { + constexpr size_t base = 3u; + constexpr size_t modulus = 256u; + return ((original_element % modulus)* base_to_the_exponent_mod_modulus(base, count, modulus)) % modulus; +}; + +template +void submdspan_run_benchmark2(benchmark::State& state, + Kokkos::extents exts) +{ + auto in_buf = benchmark_buffer{exts}; + auto out_buf = benchmark_buffer{exts}; + mdspan_benchmark::fill_random(in_buf.get_mdspan()); + + // We're using layout_right, so we don't need the layout mapping to iterate over the elements. + const size_t num_elements = out_buf.size(); + { + auto in = in_buf.get_mdspan().data_handle(); + auto out = out_buf.get_mdspan().data_handle(); + for (size_t i = 0; i < num_elements; ++i) { + out[i] = in[i]; + } + } + const size_t count = submdspan_benchmark2(state,out_buf.get_mdspan()); + { + auto in = in_buf.get_mdspan().data_handle(); + auto out = out_buf.get_mdspan().data_handle(); + for (size_t i = 0; i < num_elements; ++i) { + const auto original = in[i]; + const auto expected = expected_element(original, count); + if (out[i] != expected) { + std::cerr << "submdspan_benchmark2 failed: out[" << i << "] = " + << out[i] << " != " << expected << std::endl; + std::terminate(); + } + } + } +} + +BENCHMARK_CAPTURE(submdspan_run_benchmark2, int_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(submdspan_run_benchmark2, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(submdspan_run_benchmark2, size_t_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(submdspan_run_benchmark2, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); + BENCHMARK_MAIN(); From a020448a7704f364d330494134e0d3bb3f709638 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 23 Jun 2025 14:31:02 -0600 Subject: [PATCH 036/103] Improve benchmark CUDA version is as yet untested. It probably won't compile, at least because the P3663 implementation currently relies on C++23 or 26 language features that may not yet be implemented in NVCC. --- benchmarks/submdspan/cuda/submdspan_cuda.cu | 259 +++++++++++++++----- benchmarks/submdspan/submdspan.cpp | 228 +++++------------ benchmarks/submdspan_generic.hpp | 210 ++++++++++++++++ tests/test_convertible_to_index_type.cpp | 3 +- 4 files changed, 466 insertions(+), 234 deletions(-) create mode 100644 benchmarks/submdspan_generic.hpp diff --git a/benchmarks/submdspan/cuda/submdspan_cuda.cu b/benchmarks/submdspan/cuda/submdspan_cuda.cu index fc0b30ad..d6046014 100644 --- a/benchmarks/submdspan/cuda/submdspan_cuda.cu +++ b/benchmarks/submdspan/cuda/submdspan_cuda.cu @@ -13,64 +13,212 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER -#include -#include -#include -#include -#include - -// Whether to let mapping convert index calculation to the type used -// to index into the mdspan -//#define MDSPAN_IMPL_USE_MAPPING_ARG_CAST -// Overwrite what extents.extent() returns and what the actual storage type is -//#define MDSPAN_IMPL_OVERWRITE_EXTENTS_SIZE_TYPE int -// Choose the index type used by the code -using idx_t = size_t; - -#include "fill.hpp" -#include -//================================================================================ -static constexpr int global_delta = 1; -static constexpr int global_repeat = 16; +#include "submdspan_generic.hpp" -//================================================================================ +// This benchmark measures the overhead of submdspan slice +// canonicalization as proposed by P3663R2. +// +// Slice canonicalization happens in the submdspan function, +// before slices reach the layout mapping's submdspan_mapping +// customization. Thus, we need to call submdspan itself, +// but the layout mapping type does not matter. +// We do want to exercise a Standard layout mapping, though. +// +// The mdspan's value type doesn't matter either, +// so we can use a char-sized type to minimize storage. +// Using unsigned char makes overflow defined behavior. -template -using lmdspan = Kokkos::mdspan, Kokkos::layout_left>; -template -using rmdspan = Kokkos::mdspan, Kokkos::layout_right>; +#define CUDA_SAFE_CALL(call) \ + cuda_internal_safe_call(call, #call, __FILE__, __LINE__) +namespace submdspan_benchmark { -void throw_runtime_exception(const std::string &msg) { - std::ostringstream o; - o << msg; - throw std::runtime_error(o.str()); +inline void +cuda_internal_safe_call(cudaError e, const char* name, + const char* file, int line_number) +{ + if (cudaSuccess != e) { + std::ostringstream out; + out << name << " error( " << cudaGetErrorName(e) + << "): " << cudaGetErrorString(e); + if (file) { + out << " " << file << ":" << line_number; + } + throw std::runtime_error(out.str()); + } } -void cuda_internal_error_throw(cudaError e, const char* name, - const char* file = NULL, const int line = 0) { - std::ostringstream out; - out << name << " error( " << cudaGetErrorName(e) - << "): " << cudaGetErrorString(e); - if (file) { - out << " " << file << ":" << line; +struct cuda_execution_space {}; + +template +struct cuda_array_deleter { + void operator() (ValueType* ptr) const { + CUDA_SAFE_CALL(cudaFree(ptr)); } - throw_runtime_exception(out.str()); +}; + +template +struct array_deleter { + using type = cuda_array_deleter; +}; + +template +std::unique_ptr> +allocate_buffer(cuda_execution_space, size_t num_elements) { + ValueType* buf = nullptr; + CUDA_SAFE_CALL(cudaMalloc(&buf, num_elements * sizeof(ValueType))); + return std::unique_ptr>{buf, {}}; } -inline void cuda_internal_safe_call(cudaError e, const char* name, - const char* file = NULL, - const int line = 0) { - if (cudaSuccess != e) { - cuda_internal_error_throw(e, name, file, line); +template +void fill_with_random_values( + cuda_execution_space, + random_state_t& state, + nonconst_test_mdspan x_dev) +{ + benchmark_buffer buf_host{host_execution_space{}, x_dev.extents()}; + auto x_host = buf_host.get_mdspan(); + fill_with_random_values(host_execution_space{}, state, x_host); + + const size_t num_bytes = x_host.required_span_size() * sizeof(value_type); + CUDA_SAFE_CALL(cudaMemcpy( + x_dev.get(), x_host.get(), num_bytes, cudaMemcpyHostToDevice + )); +} + +} // namespace submdspan_benchmark + +// FIXME this should launch a device kernel +template +size_t submdspan_benchmark(ExecutionSpace&& /* exec_space */, + benchmark::State& state, + nonconst_test_mdspan out) +{ + size_t count_not_same = 0; + for (auto _ : state) { + const auto p = std::pair{IndexType(0), IndexType(1)}; + auto out_sub = Kokkos::submdspan(out, ((void) Exts, p)...); + if (out_sub[((void) Exts, 0)...] != out[((void) Exts, p.first)...]) { + ++count_not_same; + } + out_sub[((void) Exts, 0)...] += static_cast(1u); + + benchmark::DoNotOptimize(count_not_same); } + return count_not_same; } -#define CUDA_SAFE_CALL(call) \ - cuda_internal_safe_call(call, #call, __FILE__, __LINE__) +template +void submdspan_run_benchmark(ExecutionSpace exec_space, + benchmark::State& state, + Kokkos::extents exts) +{ + random_state_t random_state{}; + auto buf = benchmark_buffer{exec_space, exts}; + fill_with_random_values(exec_space, random_state, buf.get_mdspan()); + + size_t count_not_same = submdspan_benchmark(state, buf.get_mdspan()); + if (count_not_same != 0) { + std::cerr << "submdspan_benchmark failed: count not same = " << count_not_same << std::endl; + std::terminate(); + } -//================================================================================ + auto get_0th_element = [] (auto x) { return x[((void) Exts, 0)...]; }; + auto buf_0s_after = get_0th_element(buf.get_mdspan()); + benchmark::DoNotOptimize(buf_0s_after); +} + +BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (cuda_execution_space{}, Kokkos::extents{})); +BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (cuda_execution_space{}, Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (cuda_execution_space{}, Kokkos::extents{})); +BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (cuda_execution_space{}, Kokkos::dextents{2, 2, 2, 2, 2, 2})); + +// Multiply elements by 3, using 1-D slices. +template +MDSPAN_FUNCTION void +submdspan_benchmark2_loop(ExecutionSpace exec_space, const OutMdspan& out) +{ + using index_type = typename OutMdspan::index_type; + + if constexpr (OutMdspan::rank() == 0) { + return; + } + else if constexpr (OutMdspan::rank() == 1) { + const auto ext0 = out.extent(0); + for (index_type k = 0; k < ext0; ++k) { + out[k] *= 3u; + } + } + else { + const auto ext0 = index_holder{index_type(out.extent(0))}; + for (auto k = index_holder{index_type(0)}; k < ext0; ++k) { + submdspan_benchmark2_loop(exec_space, slice_one_extent(out, k)); + } + } +} + +// FIXME this should launch a device kernel, perhaps +template +size_t submdspan_benchmark2(cuda_execution_space exec_space, + benchmark::State& state, + nonconst_test_mdspan out) +{ + size_t count = 0; + for (auto _ : state) { + submdspan_benchmark2_loop(exec_space, out); + ++count; + } + benchmark::DoNotOptimize(count); + return count; +} + +template +void submdspan_run_benchmark2(cuda_execution_space exec_space, + benchmark::State& state, + Kokkos::extents exts) +{ + auto in_buf = benchmark_buffer{exec_space, exts}; + auto out_buf = benchmark_buffer{exec_space, exts}; + random_state_t random_state{}; + fill_with_random_values(exec_space, random_state, in_buf.get_mdspan()); + + // We're using layout_right, so we don't need the layout mapping to iterate over the elements. + const size_t num_elements = out_buf.size(); + { + auto in = in_buf.get_mdspan().data_handle(); + auto out = out_buf.get_mdspan().data_handle(); + for (size_t i = 0; i < num_elements; ++i) { + out[i] = in[i]; + } + } + const size_t count = submdspan_benchmark2(exec_space, state, out_buf.get_mdspan()); + { + auto in = in_buf.get_mdspan().data_handle(); + auto out = out_buf.get_mdspan().data_handle(); + for (size_t i = 0; i < num_elements; ++i) { + const auto original = in[i]; + const auto expected = expected_element(original, count); + if (out[i] != expected) { + std::cerr << "submdspan_benchmark2 failed: out[" << i << "] = " + << out[i] << " != " << expected << std::endl; + std::terminate(); + } + } + } +} + +BENCHMARK_CAPTURE(submdspan_run_benchmark2, int_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(submdspan_run_benchmark2, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(submdspan_run_benchmark2, size_t_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(submdspan_run_benchmark2, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); + +BENCHMARK_MAIN(); + + + + +namespace test { dim3 get_bench_thread_block(size_t y,size_t z) { cudaDeviceProp cudaProp; @@ -106,25 +254,6 @@ float run_kernel_timed(size_t N, size_t M, size_t K, F&& f, Args&&... args) { return milliseconds; } -template -MDSpan fill_device_mdspan(MDSpan, DynSizes... dyn) { - - using value_type = typename MDSpan::value_type; - auto buffer_size = MDSpan{nullptr, dyn...}.mapping().required_span_size(); - auto host_buffer = std::make_unique( - MDSpan{nullptr, dyn...}.mapping().required_span_size() - ); - auto host_mdspan = MDSpan{host_buffer.get(), dyn...}; - mdspan_benchmark::fill_random(host_mdspan); - - value_type* device_buffer = nullptr; - CUDA_SAFE_CALL(cudaMalloc(&device_buffer, buffer_size * sizeof(value_type))); - CUDA_SAFE_CALL(cudaMemcpy( - device_buffer, host_buffer.get(), buffer_size * sizeof(value_type), cudaMemcpyHostToDevice - )); - return MDSpan{device_buffer, dyn...}; -} - //================================================================================ template @@ -300,7 +429,3 @@ void BM_Raw_Cuda_Stencil_3D_left(benchmark::State& state, T, SizeX x_, SizeY y_, } BENCHMARK_CAPTURE(BM_Raw_Cuda_Stencil_3D_left, size_80_80_80, int(), 80, 80, 80); //BENCHMARK_CAPTURE(BM_Raw_Cuda_Stencil_3D_left, size_400_400_400, int(), 400, 400, 400); - -//================================================================================ - -BENCHMARK_MAIN(); diff --git a/benchmarks/submdspan/submdspan.cpp b/benchmarks/submdspan/submdspan.cpp index fd8673d7..5b807f50 100644 --- a/benchmarks/submdspan/submdspan.cpp +++ b/benchmarks/submdspan/submdspan.cpp @@ -13,19 +13,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER -#include "fill.hpp" -#include - -#include - -#include -#include -#include -#include -#include -#include -#include +#include "submdspan_generic.hpp" // This benchmark measures the overhead of submdspan slice // canonicalization as proposed by P3663R2. @@ -38,18 +27,13 @@ // // The mdspan's value type doesn't matter either, // so we can use a char-sized type to minimize storage. -// Using unsigned char makes overflow defined behavior. +// An unsigned integer type makes overflow defined behavior. -template -using nonconst_test_mdspan = - Kokkos::mdspan>; +namespace submdspan_benchmark { -template -using const_test_mdspan = - Kokkos::mdspan>; - -template -size_t submdspan_benchmark(benchmark::State& state, +template +size_t benchmark1_impl(ExecutionSpace&& /* exec_space */, + benchmark::State& state, nonconst_test_mdspan out) { size_t count_not_same = 0; @@ -59,50 +43,25 @@ size_t submdspan_benchmark(benchmark::State& state, if (out_sub[((void) Exts, 0)...] != out[((void) Exts, p.first)...]) { ++count_not_same; } - out_sub[((void) Exts, 0)...] += static_cast(1u); + out_sub[((void) Exts, 0)...] += static_cast(1u); benchmark::DoNotOptimize(count_not_same); } return count_not_same; } -template -class benchmark_buffer { -public: - using value_type = unsigned char; - - benchmark_buffer(Kokkos::extents exts) : - mapping_{exts}, - buffer_{std::make_unique(mapping_.required_span_size())} - {} - - size_t size() const { - return mapping_.required_span_size(); - } - - nonconst_test_mdspan get_mdspan() { - return {buffer_.get(), mapping_}; - } - - const_test_mdspan get_mdspan() const { - return {static_cast(buffer_.get()), mapping_}; - } - -private: - Kokkos::layout_right::template mapping> mapping_; - std::unique_ptr buffer_; -}; - -template -void submdspan_run_benchmark(benchmark::State& state, +template +void benchmark1(ExecutionSpace exec_space, + benchmark::State& state, Kokkos::extents exts) { - auto buf = benchmark_buffer{exts}; - mdspan_benchmark::fill_random(buf.get_mdspan()); + random_state_t random_state{}; + auto buf = benchmark_buffer{exec_space, exts}; + fill_with_random_values(exec_space, random_state, buf.get_mdspan()); - size_t count_not_same = submdspan_benchmark(state, buf.get_mdspan()); + size_t count_not_same = benchmark1_impl(exec_space, state, buf.get_mdspan()); if (count_not_same != 0) { - std::cerr << "submdspan_benchmark failed: count not same = " << count_not_same << std::endl; + std::cerr << "benchmark1 failed: count not same = " << count_not_same << std::endl; std::terminate(); } @@ -111,139 +70,69 @@ void submdspan_run_benchmark(benchmark::State& state, benchmark::DoNotOptimize(buf_0s_after); } -BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (Kokkos::extents{})); -BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); -BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (Kokkos::extents{})); -BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); - -// Make the compiler work harder by using a slice type -// that's convertible to index_type, but neither integral -// nor integral-constant-like. -template - requires (std::is_signed_v || std::is_unsigned_v) -class index_holder { -public: - index_holder(IndexType i) : i_{i} {} - constexpr operator IndexType() const noexcept { return i_; } - constexpr index_holder& operator++() noexcept { - ++i_; - return *this; - } - constexpr auto operator<=>(const index_holder&) const noexcept = default; - -private: - IndexType i_; -}; -static_assert(std::is_convertible_v, int>); -static_assert(std::is_convertible_v, size_t>); -static_assert(std::is_nothrow_constructible_v>); -static_assert(std::is_nothrow_constructible_v>); - -// Make the compiler work harder by using a slice type -// that's convertible to full_extent_t, but not full_extent_t. -struct same_as_full_extent_t { - constexpr operator Kokkos::full_extent_t() const noexcept { - return Kokkos::full_extent; - } -}; +} // namespace submdspan_benchmark -template -constexpr auto slice_one_extent( - Kokkos::mdspan, Layout, Accessor> x, Slice slice) +template +void host_benchmark1(benchmark::State& state, + Kokkos::extents exts) { - if constexpr (sizeof...(Exts) == 0) { - static_assert(false, "slice_one_extent called with no extents"); - } - else if constexpr (sizeof...(Exts) == 1) { - return Kokkos::submdspan(x, slice); - } - else { - return [&] (std::index_sequence) { - return Kokkos::submdspan(x, slice, ((void) Inds, same_as_full_extent_t{})...); - } (std::make_index_sequence()); - } + return submdspan_benchmark::benchmark1(submdspan_benchmark::host_execution_space{}, state, exts); } +BENCHMARK_CAPTURE(host_benchmark1, int_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(host_benchmark1, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(host_benchmark1, size_t_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(host_benchmark1, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); + +namespace submdspan_benchmark { + // Multiply elements by 3, using 1-D slices. -template -void submdspan_benchmark2_loop(const OutMdspan& out) { - using index_type = typename OutMdspan::index_type; +template +void benchmark2_loop(ExecutionSpace exec_space, + nonconst_test_mdspan out) +{ + using mdspan_type = nonconst_test_mdspan; - if constexpr (OutMdspan::rank() == 0) { + if constexpr (mdspan_type::rank() == 0) { return; } - else if constexpr (OutMdspan::rank() == 1) { - const auto ext0 = out.extent(0); - for (index_type k = 0; k < ext0; ++k) { + else if constexpr (mdspan_type::rank() == 1) { + const IndexType ext0 = out.extent(0); + for (IndexType k = 0; k < ext0; ++k) { out[k] *= 3u; } } else { - const auto ext0 = index_holder{index_type(out.extent(0))}; - for (auto k = index_holder{index_type(0)}; k < ext0; ++k) { - submdspan_benchmark2_loop(slice_one_extent(out, k)); + const auto ext0 = index_holder{out.extent(0)}; + for (auto k = index_holder{IndexType(0)}; k < ext0; ++k) { + benchmark2_loop(exec_space, slice_one_extent(out, k)); } } } template -size_t submdspan_benchmark2(benchmark::State& state, +size_t benchmark2_impl(host_execution_space exec_space, + benchmark::State& state, nonconst_test_mdspan out) { size_t count = 0; for (auto _ : state) { - submdspan_benchmark2_loop(out); + benchmark2_loop(exec_space, out); ++count; } benchmark::DoNotOptimize(count); return count; } -// Elements of x are uint8_t, so computations happen modulo 256. -// For each element x_e of x, on output, result is -// -// (x_e * 3^count) mod 256 -// = ((x_e mod 256) * (3^count mod 256)) mod 256. -// -// If count is a power of two, we can compute (3^count) mod 256 -// by divide and conquer. -// -// (3^count) mod 256 -// = ((3^(count/2)) mod 256) * ((3^(count/2)) mod 256) mod 256. - -inline constexpr size_t -base_to_the_exponent_mod_modulus(size_t base, size_t exponent, size_t modulus) -{ - if (modulus == 1u) { - return 0u; - } - // modulus - 1u) * (modulus - 1u) must not overflow base - size_t result = 1u; - base = base % modulus; - while (exponent > 0u) { - if (exponent % 2u == 1u) { - result = (result * base) % modulus; - } - exponent = exponent >> 1u; - base = (base * base) % modulus; - } - return result; -} - -inline constexpr size_t -expected_element(size_t original_element, size_t count) { - constexpr size_t base = 3u; - constexpr size_t modulus = 256u; - return ((original_element % modulus)* base_to_the_exponent_mod_modulus(base, count, modulus)) % modulus; -}; - template -void submdspan_run_benchmark2(benchmark::State& state, +void benchmark2(host_execution_space exec_space, + benchmark::State& state, Kokkos::extents exts) { - auto in_buf = benchmark_buffer{exts}; - auto out_buf = benchmark_buffer{exts}; - mdspan_benchmark::fill_random(in_buf.get_mdspan()); + auto in_buf = benchmark_buffer{exec_space, exts}; + auto out_buf = benchmark_buffer{exec_space, exts}; + random_state_t random_state{}; + fill_with_random_values(exec_space, random_state, in_buf.get_mdspan()); // We're using layout_right, so we don't need the layout mapping to iterate over the elements. const size_t num_elements = out_buf.size(); @@ -254,7 +143,7 @@ void submdspan_run_benchmark2(benchmark::State& state, out[i] = in[i]; } } - const size_t count = submdspan_benchmark2(state,out_buf.get_mdspan()); + const size_t count = benchmark2_impl(exec_space, state, out_buf.get_mdspan()); { auto in = in_buf.get_mdspan().data_handle(); auto out = out_buf.get_mdspan().data_handle(); @@ -262,7 +151,7 @@ void submdspan_run_benchmark2(benchmark::State& state, const auto original = in[i]; const auto expected = expected_element(original, count); if (out[i] != expected) { - std::cerr << "submdspan_benchmark2 failed: out[" << i << "] = " + std::cerr << "benchmark2 failed: out[" << i << "] = " << out[i] << " != " << expected << std::endl; std::terminate(); } @@ -270,9 +159,18 @@ void submdspan_run_benchmark2(benchmark::State& state, } } -BENCHMARK_CAPTURE(submdspan_run_benchmark2, int_6d, (Kokkos::extents{})); -BENCHMARK_CAPTURE(submdspan_run_benchmark2, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); -BENCHMARK_CAPTURE(submdspan_run_benchmark2, size_t_6d, (Kokkos::extents{})); -BENCHMARK_CAPTURE(submdspan_run_benchmark2, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +} // namespace submdspan_benchmark + +template +void host_benchmark2(benchmark::State& state, + Kokkos::extents exts) +{ + return submdspan_benchmark::benchmark2(submdspan_benchmark::host_execution_space{}, state, exts); +} + +BENCHMARK_CAPTURE(host_benchmark2, int_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(host_benchmark2, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(host_benchmark2, size_t_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(host_benchmark2, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); BENCHMARK_MAIN(); diff --git a/benchmarks/submdspan_generic.hpp b/benchmarks/submdspan_generic.hpp new file mode 100644 index 00000000..c66ab457 --- /dev/null +++ b/benchmarks/submdspan_generic.hpp @@ -0,0 +1,210 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace submdspan_benchmark { + +template +using nonconst_test_mdspan = + Kokkos::mdspan>; + +template +using const_test_mdspan = + Kokkos::mdspan>; + +class random_state_t { +public: + using seed_type = std::mt19937::result_type; + + constexpr random_state_t() : gen_(default_seed) {} + constexpr random_state_t(seed_type seed) : gen_(seed) {} + + std::mt19937& generator() noexcept { return gen_; } + +private: + static constexpr seed_type default_seed = 1234u; + std::mt19937 gen_; +}; + +template +struct array_deleter {}; + +template +using array_deleter_t = typename array_deleter::type; + +struct host_execution_space {}; + +template +struct array_deleter { + using type = std::default_delete; +}; + +template +std::unique_ptr> +allocate_buffer(host_execution_space, size_t num_elements) { + return std::make_unique(num_elements); +} + +template +void fill_with_random_values( + host_execution_space, + random_state_t& state, + nonconst_test_mdspan s) +{ + auto val_dist = std::uniform_int_distribution(0u, 255u); + auto next = [&] () { + return val_dist(state.generator()); + }; + std::generate(s.data_handle(), s.data_handle() + s.size(), next); +} + +template +class benchmark_buffer { +public: + using value_type = std::uint8_t; + + benchmark_buffer(ExecutionSpace exec_space, Kokkos::extents exts) : + mapping_{exts}, + buffer_{allocate_buffer(exec_space, mapping_.required_span_size())} + {} + + size_t size() const { + return mapping_.required_span_size(); + } + + nonconst_test_mdspan get_mdspan() { + return {buffer_.get(), mapping_}; + } + + const_test_mdspan get_mdspan() const { + return {static_cast(buffer_.get()), mapping_}; + } + +private: + Kokkos::layout_right::template mapping> mapping_; + std::unique_ptr> buffer_; +}; + +// Index or slice type that's convertible to IndexType, +// but neither integral nor integral-constant-like. +MDSPAN_TEMPLATE_REQUIRES( + class IndexType, + /* requires */ ( + std::is_signed_v || std::is_unsigned_v + ) +) +class index_holder { +public: + constexpr MDSPAN_FUNCTION index_holder(IndexType i) : i_{i} {} + constexpr MDSPAN_FUNCTION operator IndexType() const noexcept { return i_; } + constexpr MDSPAN_FUNCTION index_holder& operator++() noexcept { + ++i_; + return *this; + } +#if defined(__cpp_impl_three_way_comparison) + constexpr MDSPAN_FUNCTION auto operator<=>(const index_holder&) const noexcept = default; +#else + friend constexpr MDSPAN_FUNCTION bool operator<(const index_holder& x, const index_holder& y) noexcept { + return x.i_ < y.i_; + } + friend constexpr MDSPAN_FUNCTION bool operator==(const index_holder& x, const index_holder& y) noexcept { + return x.i_ == y.i_; + } +#endif + +private: + IndexType i_; +}; +static_assert(std::is_convertible_v, int>); +static_assert(std::is_convertible_v, size_t>); +static_assert(std::is_nothrow_constructible_v>); +static_assert(std::is_nothrow_constructible_v>); + +// Slice type that's convertible to full_extent_t, but is not full_extent_t. +struct full_extent_wrapper_t { + constexpr operator Kokkos::full_extent_t() const noexcept{ + return Kokkos::full_extent; + } +}; + +template +constexpr MDSPAN_FUNCTION auto slice_one_extent( + Kokkos::mdspan, Layout, Accessor> x, Slice slice) +{ + if constexpr (sizeof...(Exts) == 0) { + static_assert(false, "slice_one_extent called with no extents"); + } + else if constexpr (sizeof...(Exts) == 1) { + return Kokkos::submdspan(x, slice); + } + else { + return [&] (std::index_sequence) { + return Kokkos::submdspan(x, slice, ((void) Inds, full_extent_wrapper_t{})...); + } (std::make_index_sequence()); + } +} + +// Elements of x are uint8_t, so computations happen modulo 256. +// For each element x_e of x, on output, result is +// +// (x_e * 3^count) mod 256 +// = ((x_e mod 256) * (3^count mod 256)) mod 256. +// +// If count is a power of two, we can compute (3^count) mod 256 +// by divide and conquer. +// +// (3^count) mod 256 +// = ((3^(count/2)) mod 256) * ((3^(count/2)) mod 256) mod 256. + +constexpr MDSPAN_INLINE_FUNCTION size_t +base_to_the_exponent_mod_modulus(size_t base, size_t exponent, size_t modulus) +{ + if (modulus == 1u) { + return 0u; + } + // modulus - 1u) * (modulus - 1u) must not overflow base + size_t result = 1u; + base = base % modulus; + while (exponent > 0u) { + if (exponent % 2u == 1u) { + result = (result * base) % modulus; + } + exponent = exponent >> 1u; + base = (base * base) % modulus; + } + return result; +} + +constexpr MDSPAN_INLINE_FUNCTION size_t +expected_element(size_t original_element, size_t count) { + constexpr size_t base = 3u; + constexpr size_t modulus = 256u; + return ((original_element % modulus) * base_to_the_exponent_mod_modulus(base, count, modulus)) % modulus; +} + +} // namespace submdspan_benchmark diff --git a/tests/test_convertible_to_index_type.cpp b/tests/test_convertible_to_index_type.cpp index bde02f3a..33bb3028 100644 --- a/tests/test_convertible_to_index_type.cpp +++ b/tests/test_convertible_to_index_type.cpp @@ -35,7 +35,7 @@ MDSPAN_TEMPLATE_REQUIRES( ) class index_holder { public: - index_holder(IndexType i) : i_{i} {} + constexpr index_holder(IndexType i) : i_{i} {} constexpr operator IndexType() const noexcept { return i_; } constexpr index_holder& operator++() noexcept { ++i_; @@ -67,7 +67,6 @@ struct full_extent_wrapper_t { } }; - template void test_mapping_call_operator(Layout, Kokkos::extents exts) { using extents_type = Kokkos::extents; From b14253f8a2f5e321cfee608ae0ed731a894304b5 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 23 Jun 2025 14:49:31 -0600 Subject: [PATCH 037/103] Refine submdspan benchmark --- benchmarks/submdspan/cuda/submdspan_cuda.cu | 86 ++++++++++----------- benchmarks/submdspan/submdspan.cpp | 24 +----- benchmarks/submdspan_generic.hpp | 26 +++++++ 3 files changed, 70 insertions(+), 66 deletions(-) diff --git a/benchmarks/submdspan/cuda/submdspan_cuda.cu b/benchmarks/submdspan/cuda/submdspan_cuda.cu index d6046014..d3e32042 100644 --- a/benchmarks/submdspan/cuda/submdspan_cuda.cu +++ b/benchmarks/submdspan/cuda/submdspan_cuda.cu @@ -87,11 +87,9 @@ void fill_with_random_values( )); } -} // namespace submdspan_benchmark - // FIXME this should launch a device kernel -template -size_t submdspan_benchmark(ExecutionSpace&& /* exec_space */, +template +size_t benchmark1_impl(cuda_execution_space /* exec_space */, benchmark::State& state, nonconst_test_mdspan out) { @@ -109,64 +107,55 @@ size_t submdspan_benchmark(ExecutionSpace&& /* exec_space */, return count_not_same; } -template -void submdspan_run_benchmark(ExecutionSpace exec_space, - benchmark::State& state, +} // namespace submdspan_benchmark + +template +void cuda_benchmark1(benchmark::State& state, Kokkos::extents exts) { - random_state_t random_state{}; - auto buf = benchmark_buffer{exec_space, exts}; - fill_with_random_values(exec_space, random_state, buf.get_mdspan()); - - size_t count_not_same = submdspan_benchmark(state, buf.get_mdspan()); - if (count_not_same != 0) { - std::cerr << "submdspan_benchmark failed: count not same = " << count_not_same << std::endl; - std::terminate(); - } - - auto get_0th_element = [] (auto x) { return x[((void) Exts, 0)...]; }; - auto buf_0s_after = get_0th_element(buf.get_mdspan()); - benchmark::DoNotOptimize(buf_0s_after); + return submdspan_benchmark::benchmark1(submdspan_benchmark::cuda_execution_space{}, state, exts); } -BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (cuda_execution_space{}, Kokkos::extents{})); -BENCHMARK_CAPTURE(submdspan_run_benchmark, int_6d, (cuda_execution_space{}, Kokkos::dextents{2, 2, 2, 2, 2, 2})); -BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (cuda_execution_space{}, Kokkos::extents{})); -BENCHMARK_CAPTURE(submdspan_run_benchmark, size_t_6d, (cuda_execution_space{}, Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(cuda_benchmark1, int_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(cuda_benchmark1, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(cuda_benchmark1, size_t_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(cuda_benchmark1, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); + +namespace submdspan_benchmark { // Multiply elements by 3, using 1-D slices. -template -MDSPAN_FUNCTION void -submdspan_benchmark2_loop(ExecutionSpace exec_space, const OutMdspan& out) +template +void benchmark2_loop(ExecutionSpace exec_space, + nonconst_test_mdspan out) { - using index_type = typename OutMdspan::index_type; + using mdspan_type = nonconst_test_mdspan; - if constexpr (OutMdspan::rank() == 0) { + if constexpr (mdspan_type::rank() == 0) { return; } - else if constexpr (OutMdspan::rank() == 1) { - const auto ext0 = out.extent(0); - for (index_type k = 0; k < ext0; ++k) { + else if constexpr (mdspan_type::rank() == 1) { + const IndexType ext0 = out.extent(0); + for (IndexType k = 0; k < ext0; ++k) { out[k] *= 3u; } } else { - const auto ext0 = index_holder{index_type(out.extent(0))}; - for (auto k = index_holder{index_type(0)}; k < ext0; ++k) { - submdspan_benchmark2_loop(exec_space, slice_one_extent(out, k)); + const auto ext0 = index_holder{out.extent(0)}; + for (auto k = index_holder{IndexType(0)}; k < ext0; ++k) { + benchmark2_loop(exec_space, slice_one_extent(out, k)); } } } // FIXME this should launch a device kernel, perhaps template -size_t submdspan_benchmark2(cuda_execution_space exec_space, +size_t benchmark2_impl(host_execution_space exec_space, benchmark::State& state, nonconst_test_mdspan out) { size_t count = 0; for (auto _ : state) { - submdspan_benchmark2_loop(exec_space, out); + benchmark2_loop(exec_space, out); ++count; } benchmark::DoNotOptimize(count); @@ -174,7 +163,7 @@ size_t submdspan_benchmark2(cuda_execution_space exec_space, } template -void submdspan_run_benchmark2(cuda_execution_space exec_space, +void benchmark2(host_execution_space exec_space, benchmark::State& state, Kokkos::extents exts) { @@ -192,7 +181,7 @@ void submdspan_run_benchmark2(cuda_execution_space exec_space, out[i] = in[i]; } } - const size_t count = submdspan_benchmark2(exec_space, state, out_buf.get_mdspan()); + const size_t count = benchmark2_impl(exec_space, state, out_buf.get_mdspan()); { auto in = in_buf.get_mdspan().data_handle(); auto out = out_buf.get_mdspan().data_handle(); @@ -200,7 +189,7 @@ void submdspan_run_benchmark2(cuda_execution_space exec_space, const auto original = in[i]; const auto expected = expected_element(original, count); if (out[i] != expected) { - std::cerr << "submdspan_benchmark2 failed: out[" << i << "] = " + std::cerr << "benchmark2 failed: out[" << i << "] = " << out[i] << " != " << expected << std::endl; std::terminate(); } @@ -208,10 +197,19 @@ void submdspan_run_benchmark2(cuda_execution_space exec_space, } } -BENCHMARK_CAPTURE(submdspan_run_benchmark2, int_6d, (Kokkos::extents{})); -BENCHMARK_CAPTURE(submdspan_run_benchmark2, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); -BENCHMARK_CAPTURE(submdspan_run_benchmark2, size_t_6d, (Kokkos::extents{})); -BENCHMARK_CAPTURE(submdspan_run_benchmark2, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +} // namespace submdspan_benchmark + +template +void cuda_benchmark2(benchmark::State& state, + Kokkos::extents exts) +{ + return submdspan_benchmark::benchmark2(submdspan_benchmark::cuda_execution_space{}, state, exts); +} + +BENCHMARK_CAPTURE(cuda_benchmark2, int_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(cuda_benchmark2, int_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); +BENCHMARK_CAPTURE(cuda_benchmark2, size_t_6d, (Kokkos::extents{})); +BENCHMARK_CAPTURE(cuda_benchmark2, size_t_6d, (Kokkos::dextents{2, 2, 2, 2, 2, 2})); BENCHMARK_MAIN(); diff --git a/benchmarks/submdspan/submdspan.cpp b/benchmarks/submdspan/submdspan.cpp index 5b807f50..2451f39c 100644 --- a/benchmarks/submdspan/submdspan.cpp +++ b/benchmarks/submdspan/submdspan.cpp @@ -31,8 +31,8 @@ namespace submdspan_benchmark { -template -size_t benchmark1_impl(ExecutionSpace&& /* exec_space */, +template +size_t benchmark1_impl(host_execution_space /* exec_space */, benchmark::State& state, nonconst_test_mdspan out) { @@ -50,26 +50,6 @@ size_t benchmark1_impl(ExecutionSpace&& /* exec_space */, return count_not_same; } -template -void benchmark1(ExecutionSpace exec_space, - benchmark::State& state, - Kokkos::extents exts) -{ - random_state_t random_state{}; - auto buf = benchmark_buffer{exec_space, exts}; - fill_with_random_values(exec_space, random_state, buf.get_mdspan()); - - size_t count_not_same = benchmark1_impl(exec_space, state, buf.get_mdspan()); - if (count_not_same != 0) { - std::cerr << "benchmark1 failed: count not same = " << count_not_same << std::endl; - std::terminate(); - } - - auto get_0th_element = [] (auto x) { return x[((void) Exts, 0)...]; }; - auto buf_0s_after = get_0th_element(buf.get_mdspan()); - benchmark::DoNotOptimize(buf_0s_after); -} - } // namespace submdspan_benchmark template diff --git a/benchmarks/submdspan_generic.hpp b/benchmarks/submdspan_generic.hpp index c66ab457..bfbafcd6 100644 --- a/benchmarks/submdspan_generic.hpp +++ b/benchmarks/submdspan_generic.hpp @@ -110,6 +110,32 @@ class benchmark_buffer { std::unique_ptr> buffer_; }; +template +size_t benchmark1_impl(ExecutionSpace /* exec_space */, + benchmark::State& state, + nonconst_test_mdspan out); + +// This works for host_execution_space and cuda_execution_space. +template +void benchmark1(ExecutionSpace exec_space, + benchmark::State& state, + Kokkos::extents exts) +{ + random_state_t random_state{}; + auto buf = benchmark_buffer{exec_space, exts}; + fill_with_random_values(exec_space, random_state, buf.get_mdspan()); + + size_t count_not_same = benchmark1_impl(exec_space, state, buf.get_mdspan()); + if (count_not_same != 0) { + std::cerr << "benchmark1 failed: count not same = " << count_not_same << std::endl; + std::terminate(); + } + + auto get_0th_element = [] (auto x) { return x[((void) Exts, 0)...]; }; + auto buf_0s_after = get_0th_element(buf.get_mdspan()); + benchmark::DoNotOptimize(buf_0s_after); +} + // Index or slice type that's convertible to IndexType, // but neither integral nor integral-constant-like. MDSPAN_TEMPLATE_REQUIRES( From 62c91587575b209ad48ddd726941417be87e393d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 24 Jun 2025 10:33:03 -0600 Subject: [PATCH 038/103] Attempt to work around GCC 11.4.0 C++20 ICE ... in constant_wrapper. The issue is with the default argument of the second template argument of constant_wrapper. typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type Replacing that expression with use of the following alias doesn't help. namespace exposition_only { template using unspecified_t = typename decltype(cw_fixed_value(X))::type; } Moving the definitions of the various specializations of cw_fixed_value above this point doesn't help either. This generally works fine even with Clang 21. --- .../__p2630_bits/constant_wrapper.hpp | 18 +++++++++++++++--- tests/CMakeLists.txt | 4 ++-- tests/test_constant_wrapper.cpp | 1 - 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index 58dc7043..d0679680 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -28,9 +28,21 @@ namespace exposition_only { // template // using unspecified_t = typename decltype(cw_fixed_value(X))::type; // } - -template // exposition only +// +// Moving the definitions of the various specializations of cw_fixed_value +// above this point doesn't help either. + +// This generally works fine even with Clang 21. +#define MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND 1 + +template< + exposition_only::cw_fixed_value X, +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + typename unspecified = typename decltype(X)::type // exposition only +#else + typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type // exposition only +#endif +> struct constant_wrapper; template diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8aaa87d8..222cb9a8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -111,9 +111,9 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL Clang) OR ((CMAKE_CXX_COMPILER_ID STREQUAL GN endif() endif() -if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 20)) +#if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 20)) mdspan_add_test(test_constant_wrapper) -endif() +#endif() if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 17)) mdspan_add_test(test_strided_slice) endif() diff --git a/tests/test_constant_wrapper.cpp b/tests/test_constant_wrapper.cpp index e91f8fda..1476e82d 100644 --- a/tests/test_constant_wrapper.cpp +++ b/tests/test_constant_wrapper.cpp @@ -13,7 +13,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER -#include #include #include From 930998a823a78806215e149972cf655276ac7d2c Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 24 Jun 2025 10:58:08 -0600 Subject: [PATCH 039/103] Get GCC 11.4.0 (C++20) to a meaningful build error Work around lack of parameter pack indexing and fix constant_wrapper's assignment operator. After some adjustments to the constant_wrapper implementation, I found out that its overloaded arithmetic operators -- specifically, binary operator+ -- do not work. .../submdspan_extents.hpp:96:22: error: static assertion failed 96 | static_assert(std::is_same_v< | ~~~~~^~~~~~~~~~ 97 | decltype(counter + std::cw), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 98 | std::constant_wrapper | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 99 | >); Binary operator+ doesn't kick in; the first branch of the `is_same_v` is `unsigned long`, i.e., `size_t`. --- .../__p2630_bits/constant_wrapper.hpp | 14 ++- .../experimental/__p2630_bits/submdspan.hpp | 16 ++++ .../__p2630_bits/submdspan_extents.hpp | 86 +++++++++++++++---- 3 files changed, 98 insertions(+), 18 deletions(-) diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index d0679680..d236df84 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -202,8 +202,18 @@ struct constant_wrapper: exposition_only::cw_operators { using value_type = typename decltype(X)::type; template - constexpr auto operator=(R) const noexcept requires requires(value_type x) { x = R::value; } - { return constant_wrapper<[] { auto v = value; return v = R::value; }()>{}; } +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + requires(std::is_assignable_v) +#endif + constexpr auto operator=(R) const noexcept +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + requires requires(value_type x) { x = R::value; } +#endif + { + return constant_wrapper< + [] { auto v = value; return v = R::value; }() + >{}; + } constexpr operator decltype(auto)() const noexcept { return value; } constexpr decltype(auto) operator()() const noexcept requires (!std::invocable) { return value; } diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index aab70a5e..c84004c7 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -28,17 +28,33 @@ submdspan(const mdspan &src, SliceSpecifiers... slices) { #if defined(MDSPAN_ENABLE_P3663) + +# if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + + auto canonical_slices_tuple = + submdspan_canonicalize_slices(src.extents(), slices...); + auto sub_map_result = std::apply( + [&] (TheSlices&&... the_slices) { + return submdspan_mapping(src.mapping(), std::forward(the_slices)...); + }, canonical_slices_tuple); + +# else + auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); // NOTE Added to P3663R2: [canonical_]slices (incorrect formatting). auto sub_map_result = submdspan_mapping(src.mapping(), canonical_slices...); + +# endif + // NOTE Added to P3663R2: It's src.data_handle(), not src.data(). // NOTE Added to P3663R2: Missing "typename" before AccessorPolicy::offset_policy. return mdspan( src.accessor().offset(src.data_handle(), sub_map_result.offset), sub_map_result.mapping, typename AccessorPolicy::offset_policy(src.accessor())); + #else const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...); // NVCC has a problem with the deduction so lets figure out the type diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 060d94de..b0f86e28 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -857,6 +857,17 @@ template constexpr check_static_bounds_result check_static_bounds( const extents&) { +#if defined(__cpp_pack_indexing) + constexpr size_t Exts_k = Exts...[k]; +#else + constexpr size_t Exts_k = [] () { + size_t result = 0; + size_t i = 0; + (void) ((i++ == k ? (result = Exts, true) : false) || ...); + return result; + } (); +#endif + if constexpr (std::is_convertible_v) { return check_static_bounds_result::in_bounds; } @@ -869,10 +880,10 @@ template if constexpr (de_ice(S_k{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.1 } - else if constexpr (Exts...[k] != dynamic_extent && Exts...[k] <= de_ice(S_k{})) { + else if constexpr (Exts_k != dynamic_extent && Exts_k <= de_ice(S_k{})) { return check_static_bounds_result::out_of_bounds; } - else if constexpr (Exts...[k] != dynamic_extent && de_ice(S_k{}) < Exts...[k]) { + else if constexpr (Exts_k != dynamic_extent && de_ice(S_k{}) < Exts_k) { return check_static_bounds_result::in_bounds; } else { @@ -891,7 +902,7 @@ template return check_static_bounds_result::out_of_bounds; // 14.3.1 } else if constexpr ( - Exts...[k] != dynamic_extent && Exts...[k] < de_ice(offset_type{})) + Exts_k != dynamic_extent && Exts_k < de_ice(offset_type{})) { return check_static_bounds_result::out_of_bounds; // 14.3.2 } @@ -902,16 +913,16 @@ template return check_static_bounds_result::out_of_bounds; // 14.3.3 } else if constexpr ( - Exts...[k] != dynamic_extent && - Exts...[k] < de_ice(offset_type{}) + de_ice(extent_type{})) + Exts_k != dynamic_extent && + Exts_k < de_ice(offset_type{}) + de_ice(extent_type{})) { return check_static_bounds_result::out_of_bounds; // 14.3.4 } else if constexpr ( - Exts...[k] != dynamic_extent && + Exts_k != dynamic_extent && 0 <= de_ice(offset_type{}) && de_ice(offset_type{}) <= de_ice(offset_type{}) + de_ice(extent_type{}) && - de_ice(offset_type{}) + de_ice(extent_type{}) <= Exts...[k]) + de_ice(offset_type{}) + de_ice(extent_type{}) <= Exts_k) { return check_static_bounds_result::in_bounds; // 14.3.5 } @@ -954,8 +965,8 @@ template return check_static_bounds_result::out_of_bounds; // 14.4.1 } else if constexpr ( - Exts...[k] != dynamic_extent && - Exts...[k] < de_ice(S_k0{})) + Exts_k != dynamic_extent && + Exts_k < de_ice(S_k0{})) { return check_static_bounds_result::out_of_bounds; // 14.4.2 } @@ -966,16 +977,16 @@ template return check_static_bounds_result::out_of_bounds; // 14.4.3 } else if constexpr ( - Exts...[k] != dynamic_extent && - Exts...[k] < de_ice(S_k1{})) + Exts_k != dynamic_extent && + Exts_k < de_ice(S_k1{})) { return check_static_bounds_result::out_of_bounds; // 14.4.4 } else if constexpr ( - Exts...[k] != dynamic_extent && + Exts_k != dynamic_extent && 0 <= de_ice(S_k0{}) && de_ice(S_k0{}) <= de_ice(S_k1{}) && - de_ice(S_k1{}) <= Exts...[k]) + de_ice(S_k1{}) <= Exts_k) { return check_static_bounds_result::in_bounds; // 14.4.5 } @@ -1050,21 +1061,50 @@ constexpr void check_canonical_kth_submdspan_slice_type(const extents& exts, Slice slice) { if constexpr (! is_canonical_slice_type()) { +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) static_assert(false); +#endif } else { // 3.2 +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + static_assert(check_static_bounds(extents{}) != check_static_bounds_result::out_of_bounds); +#else static_assert(check_static_bounds(exts) != check_static_bounds_result::out_of_bounds); +#endif } } +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +template +constexpr decltype(auto) get_kth_in_pack(First&& first, Rest&&... rest) { + static_assert(k <= sizeof...(Rest)); + if constexpr (k == 0) { + return std::forward(first); + } + else { + return get_kth_in_pack(std::forward(rest)...); + } +} +#endif + template MDSPAN_INLINE_FUNCTION constexpr void check_canonical_kth_subdmspan_slice_types( const extents& exts, Slices... slices) -{ +{ [&] (std::index_sequence) { - (check_canonical_kth_submdspan_slice_type(exts, slices...[Inds]), ...); + (check_canonical_kth_submdspan_slice_type( + exts, +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +# if ! defined(__cpp_pack_indexing) +# error "This branch requires the C++26 'parameter pack indexing' language feature." +# endif + slices...[Inds] +#else + get_kth_in_pack(slices...) +#endif + ), ...); } (std::make_index_sequence{}); } @@ -1075,7 +1115,11 @@ constexpr auto submdspan_canonicalize_one_slice(const extents& exts, Slice s) { // Part of [mdspan.sub.slices] 9. // This could be combined with the if constexpr branches below. +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + static_assert(check_static_bounds(extents{}) != check_static_bounds_result::out_of_bounds); +#else static_assert(check_static_bounds(exts) != check_static_bounds_result::out_of_bounds); +#endif // TODO Check Precondition that s is a valid k-th submdspan slice for exts. @@ -1127,7 +1171,17 @@ submdspan_canonicalize_slices(const extents& exts, Slices return std::tuple{ // This is ill-formed if slices...[Inds] is not a valid slice type. // That implements the Mandates clause of [mdspan.sub.slices] 9. - detail::submdspan_canonicalize_one_slice(exts, slices...[Inds])... + detail::submdspan_canonicalize_one_slice( + exts, +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +# if ! defined(__cpp_pack_indexing) +# error "This branch requires the C++26 'parameter pack indexing' language feature." +# endif + slices...[Inds] +#else + detail::get_kth_in_pack(slices...) +#endif + )... }; } (std::make_index_sequence{}); } From 202af612e055c0f8f414e7c84e0fbd3797a1943a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 24 Jun 2025 12:30:55 -0600 Subject: [PATCH 040/103] Fix GCC 11.4.0 C++20 build Define MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND to 1 to get the work-around for GCC 11.4.0 C++20. The key is not to rely on constant_wrapper's binary operator-. I've tried a few designs for overloaded operators on constant_wrapper, but none of them work with GCC 11.4.0. --- benchmarks/submdspan/submdspan.cpp | 4 +- benchmarks/submdspan_generic.hpp | 49 +++++- .../__p2630_bits/constant_wrapper.hpp | 165 ++++++++++++++++-- .../__p2630_bits/submdspan_extents.hpp | 104 ++++------- tests/test_canonicalize_slices.cpp | 17 ++ tests/test_constant_wrapper.cpp | 35 ++++ tests/test_strided_slice.cpp | 2 + 7 files changed, 287 insertions(+), 89 deletions(-) diff --git a/benchmarks/submdspan/submdspan.cpp b/benchmarks/submdspan/submdspan.cpp index 2451f39c..2c72217b 100644 --- a/benchmarks/submdspan/submdspan.cpp +++ b/benchmarks/submdspan/submdspan.cpp @@ -40,10 +40,10 @@ size_t benchmark1_impl(host_execution_space /* exec_space */, for (auto _ : state) { const auto p = std::pair{IndexType(0), IndexType(1)}; auto out_sub = Kokkos::submdspan(out, ((void) Exts, p)...); - if (out_sub[((void) Exts, 0)...] != out[((void) Exts, p.first)...]) { + if (get_broadcast_element(out_sub, 0) != get_broadcast_element(out, p.first)) { ++count_not_same; } - out_sub[((void) Exts, 0)...] += static_cast(1u); + get_broadcast_element(out_sub, 0) += static_cast(1u); benchmark::DoNotOptimize(count_not_same); } diff --git a/benchmarks/submdspan_generic.hpp b/benchmarks/submdspan_generic.hpp index bfbafcd6..4008cb3c 100644 --- a/benchmarks/submdspan_generic.hpp +++ b/benchmarks/submdspan_generic.hpp @@ -29,6 +29,44 @@ namespace submdspan_benchmark { +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + +template +constexpr typename Kokkos::mdspan::reference +get_broadcast_element_impl( + const Kokkos::mdspan& x, + typename Extents::index_type broadcast_index, + std::index_sequence) +{ +#if defined(MDSPAN_USE_BRACKET_OPERATOR) && (MDSPAN_USE_BRACKET_OPERATOR != 0) + return x[((void) Indices, 0)...]; +#else + return x(((void) Indices, 0)...); +#endif +} + +template +constexpr typename Kokkos::mdspan::reference +get_broadcast_element( + const Kokkos::mdspan& x, + typename Extents::index_type broadcast_index) +{ + return get_broadcast_element_impl(x, broadcast_index, std::make_index_sequence()); +} + +#else + +template +constexpr typename Kokkos::mdspan, Layout, Accessor>::reference +get_broadcast_element( + const Kokkos::mdspan, Layout, Accessor>& x, + typename Kokkos::extents::index_type broadcast_index) +{ + return x[((void) Exts, broadcast_index)...]; +} + +#endif + template using nonconst_test_mdspan = Kokkos::mdspan>; @@ -41,8 +79,8 @@ class random_state_t { public: using seed_type = std::mt19937::result_type; - constexpr random_state_t() : gen_(default_seed) {} - constexpr random_state_t(seed_type seed) : gen_(seed) {} + random_state_t() : gen_(default_seed) {} + random_state_t(seed_type seed) : gen_(seed) {} std::mt19937& generator() noexcept { return gen_; } @@ -131,8 +169,7 @@ void benchmark1(ExecutionSpace exec_space, std::terminate(); } - auto get_0th_element = [] (auto x) { return x[((void) Exts, 0)...]; }; - auto buf_0s_after = get_0th_element(buf.get_mdspan()); + auto buf_0s_after = get_broadcast_element(buf.get_mdspan(), 0); benchmark::DoNotOptimize(buf_0s_after); } @@ -183,7 +220,11 @@ constexpr MDSPAN_FUNCTION auto slice_one_extent( Kokkos::mdspan, Layout, Accessor> x, Slice slice) { if constexpr (sizeof...(Exts) == 0) { +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + static_assert(sizeof...(Exts) != 0, "slice_one_extent called with no extents"); +#else static_assert(false, "slice_one_extent called with no extents"); +#endif } else if constexpr (sizeof...(Exts) == 1) { return Kokkos::submdspan(x, slice); diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index d236df84..f877387b 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -1,15 +1,18 @@ #pragma once +#include +#include + +// This generally works fine even with Clang 21. +#define MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND 1 + // Implementation borrowed from // https://github.com/tzlaine/constexpr/blob/master/include/constant_wrapper.hpp // to which P2781 links. Provisionally assume that the feature test // macro will be called __cpp_lib_constant_wrapper and that the // features in P2781 will go in . - -#include -#include - #if ! defined(__cpp_lib_constant_wrapper) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) namespace std { @@ -31,17 +34,17 @@ namespace exposition_only { // // Moving the definitions of the various specializations of cw_fixed_value // above this point doesn't help either. - -// This generally works fine even with Clang 21. -#define MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND 1 +// +// Replacing the offending expression with +// +// typename unspecified = typename decltype(X)::type // exposition only +// +// gets us only part of the way there. template< exposition_only::cw_fixed_value X, -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) - typename unspecified = typename decltype(X)::type // exposition only -#else - typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type // exposition only -#endif + typename unspecified = + typename decltype(exposition_only::cw_fixed_value(X))::type // exposition only > struct constant_wrapper; @@ -201,6 +204,7 @@ struct constant_wrapper: exposition_only::cw_operators { using type = constant_wrapper; using value_type = typename decltype(X)::type; + // Leaving the work-around path in place, in case we need it later. template #if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) requires(std::is_assignable_v) @@ -218,7 +222,7 @@ struct constant_wrapper: exposition_only::cw_operators { constexpr operator decltype(auto)() const noexcept { return value; } constexpr decltype(auto) operator()() const noexcept requires (!std::invocable) { return value; } -#if defined(__cpp_explicit_this_parameter) +#if defined(__cpp_explicit_this_parameter) using exposition_only::cw_operators::operator(); #endif }; @@ -228,4 +232,139 @@ template } // namespace std +#else + +namespace std { + +namespace exposition_only { + template + struct cw_fixed_value; // exposition only + + template + constexpr bool is_cw_fixed_value_v = false; + + template + constexpr bool is_cw_fixed_value_v> = true; +} + +// GCC 11.4 has ICE with +// "typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type" +// as second template parameter of constant_wrapper below. +// +// Replacing that expression with use of the following alias doesn't help. +// +// namespace exposition_only { +// template +// using unspecified_t = typename decltype(cw_fixed_value(X))::type; +// } +// +// Moving the definitions of the various specializations of cw_fixed_value +// above this point doesn't help either. +// +// Replacing the offending expression with +// +// typename unspecified = typename decltype(X)::type // exposition only +// +// gets us only part of the way there. + +template< + exposition_only::cw_fixed_value X, + typename unspecified = + // typename decltype(exposition_only::cw_fixed_value(X))::type // exposition only + typename decltype(X)::type // exposition only +> +struct constant_wrapper; + +template +concept constexpr_param = requires { typename constant_wrapper; }; // exposition only + +namespace exposition_only { + template + struct cw_fixed_value { // exposition only + + static_assert(! std::is_array_v, "Not implemented for array types"); + static_assert(! is_cw_fixed_value_v, "cw_fixed_value recursion is forbidden"); + + using type = T; + constexpr cw_fixed_value(type v) noexcept: data(v) { } + T data; + }; + + template + cw_fixed_value(T) -> cw_fixed_value; // exposition only + + namespace cw_operators { // exposition only + template + struct adl { +#if ! defined(MDSPAN_CONSTANT_WRAPPER_OPERATOR_TEMPLATE_PARAM) +//#define MDSPAN_CONSTANT_WRAPPER_OPERATOR_TEMPLATE_PARAM constexpr_param +#define MDSPAN_CONSTANT_WRAPPER_OPERATOR_TEMPLATE_PARAM class +#endif + + // unary operators + template + friend constexpr auto operator+(T) noexcept -> constant_wrapper<(+T::value)> { return {}; } + template + friend constexpr auto operator-(T) noexcept -> constant_wrapper<(-T::value)> { return {}; } + + // binary operators + template + friend constexpr auto operator+(L, R) noexcept -> constant_wrapper<(L::value + R::value)> { return {}; } + template + friend constexpr auto operator-(L, R) noexcept -> constant_wrapper<(L::value - R::value)> { return {}; } + template + friend constexpr auto operator*(L, R) noexcept -> constant_wrapper<(L::value * R::value)> { return {}; } + template + friend constexpr auto operator/(L, R) noexcept -> constant_wrapper<(L::value / R::value)> { return {}; } + template + friend constexpr auto operator%(L, R) noexcept -> constant_wrapper<(L::value % R::value)> { return {}; } + + // comparisons + +#if defined(__cpp_impl_three_way_comparison) + template + friend constexpr auto operator<=>(L, R) noexcept -> constant_wrapper<(L::value <=> R::value)> { return {}; } +#endif + template + friend constexpr auto operator<(L, R) noexcept -> constant_wrapper<(L::value < R::value)> { return {}; } + template + friend constexpr auto operator<=(L, R) noexcept -> constant_wrapper<(L::value <= R::value)> { return {}; } + template + friend constexpr auto operator==(L, R) noexcept -> constant_wrapper<(L::value == R::value)> { return {}; } + template + friend constexpr auto operator!=(L, R) noexcept -> constant_wrapper<(L::value != R::value)> { return {}; } + template + friend constexpr auto operator>(L, R) noexcept -> constant_wrapper<(L::value > R::value)> { return {}; } + template + friend constexpr auto operator>=(L, R) noexcept -> constant_wrapper<(L::value >= R::value)> { return {}; } + }; + } // namespace cw_operators +} // namespace exposition_only + +template +struct constant_wrapper : exposition_only::cw_operators::adl<> { + static constexpr const auto & value = X.data; + using type = constant_wrapper; + using value_type = typename decltype(X)::type; + + // Leaving the work-around path in place, in case we need it later. + template + requires(std::is_assignable_v) + constexpr auto operator=(R) const noexcept + { + return constant_wrapper< + [] { auto v = value; return v = R::value; }() + >{}; + } + + constexpr operator decltype(auto)() const noexcept { return value; } + constexpr decltype(auto) operator()() const noexcept requires (!std::invocable) { return value; } +}; + +template + constinit auto cw = constant_wrapper{}; + +} // namespace std + +#endif // ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) #endif // ! defined(__cpp_lib_constant_wrapper) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index b0f86e28..00851c4e 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -38,13 +38,14 @@ constexpr bool is_constant_wrapper> = true; // to contain the mapped indices. // end of recursion specialization containing the final index_sequence -template < +template< #if defined(MDSPAN_ENABLE_P3663) - auto + auto Counter, #else - size_t + size_t Counter, #endif - Counter, size_t... MapIdxs> + size_t... MapIdxs +> MDSPAN_INLINE_FUNCTION constexpr auto inv_map_rank( #if defined(MDSPAN_ENABLE_P3663) @@ -60,11 +61,10 @@ constexpr auto inv_map_rank( // specialization reducing rank by one (i.e., integral slice specifier) template< #if defined(MDSPAN_ENABLE_P3663) - auto + auto Counter, #else - size_t + size_t Counter, #endif - Counter, class Slice, class... SliceSpecifiers, size_t... MapIdxs> @@ -92,7 +92,7 @@ constexpr auto inv_map_rank( std::index_sequence >; -#if defined(MDSPAN_ENABLE_P3663) +#if defined(MDSPAN_ENABLE_P3663) && ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) static_assert(std::is_same_v< decltype(counter + std::cw), std::constant_wrapper @@ -205,9 +205,9 @@ constexpr Integral first_of(const Integral &i) { #if defined(MDSPAN_ENABLE_P3663) template MDSPAN_INLINE_FUNCTION -constexpr auto -first_of(std::constant_wrapper i) { - return i; +constexpr std::constant_wrapper +first_of(std::constant_wrapper) { + return {}; } #else // NOTE This is technically not conforming. @@ -297,11 +297,10 @@ first_of(const strided_slice &r) { // This is needed in the case of slice being full_extent_t. MDSPAN_TEMPLATE_REQUIRES( #if defined(MDSPAN_ENABLE_P3663) - auto + auto k, #else - size_t -#endif - k, + size_t k, +#endif class Extents, class Integral, /* requires */(std::is_convertible_v) @@ -324,94 +323,52 @@ constexpr Integral last_of( // P3663 does not need these index_pair_like overloads, // because last_of should never see a pair-like type. MDSPAN_TEMPLATE_REQUIRES( -#if defined(MDSPAN_ENABLE_P3663) - auto -#else - size_t -#endif - k, + size_t k, class Extents, class Slice, /* requires */(index_pair_like::value) ) MDSPAN_INLINE_FUNCTION constexpr auto last_of( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper, -#else std::integral_constant, -#endif const Extents &, const Slice &i) { -#if defined(MDSPAN_ENABLE_P3663) using std::get; -#endif return get<1>(i); } MDSPAN_TEMPLATE_REQUIRES( -#if defined(MDSPAN_ENABLE_P3663) - auto -#else - size_t -#endif - k, + size_t k, class Extents, class IdxT1, class IdxT2, /* requires */ (index_pair_like, size_t>::value) ) constexpr auto last_of( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper, -#else std::integral_constant, -#endif const Extents &, const std::tuple& i) { -#if defined(MDSPAN_ENABLE_P3663) using std::get; -#endif return get<1>(i); } MDSPAN_TEMPLATE_REQUIRES( -#if defined(MDSPAN_ENABLE_P3663) - auto -#else - size_t -#endif - k, + size_t k, class Extents, class IdxT1, class IdxT2, /* requires */ (index_pair_like, size_t>::value) ) MDSPAN_INLINE_FUNCTION constexpr auto last_of( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper, -#else std::integral_constant, -#endif const Extents &, const std::pair& i) { return i.second; } -template< -#if defined(MDSPAN_ENABLE_P3663) - auto -#else - size_t -#endif - k, - class Extents, class T> +template MDSPAN_INLINE_FUNCTION constexpr auto last_of( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper, -#else std::integral_constant, -#endif const Extents &, const std::complex &i) { @@ -797,7 +754,20 @@ constexpr auto canonical_ice(S s) { template constexpr auto subtract_ice(X x, Y y) { +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + // Key to the work-around is acknowledging that GCC 11.4.0 can't find + // constant_wrapper's overloaded arithmetic operators. + if constexpr (__mdspan_integral_constant_like> && + __mdspan_integral_constant_like>) + { + return std::cw(Y::value) - canonical_ice(X::value))>; + } + else { + return canonical_ice(y) - canonical_ice(x); + } +#else return canonical_ice(y) - canonical_ice(x); +#endif } template @@ -1074,7 +1044,7 @@ check_canonical_kth_submdspan_slice_type(const extents& e } } -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if ! defined(__cpp_pack_indexing) template constexpr decltype(auto) get_kth_in_pack(First&& first, Rest&&... rest) { static_assert(k <= sizeof...(Rest)); @@ -1096,10 +1066,7 @@ check_canonical_kth_subdmspan_slice_types( [&] (std::index_sequence) { (check_canonical_kth_submdspan_slice_type( exts, -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) -# if ! defined(__cpp_pack_indexing) -# error "This branch requires the C++26 'parameter pack indexing' language feature." -# endif +#if defined(__cpp_pack_indexing) slices...[Inds] #else get_kth_in_pack(slices...) @@ -1173,10 +1140,7 @@ submdspan_canonicalize_slices(const extents& exts, Slices // That implements the Mandates clause of [mdspan.sub.slices] 9. detail::submdspan_canonicalize_one_slice( exts, -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) -# if ! defined(__cpp_pack_indexing) -# error "This branch requires the C++26 'parameter pack indexing' language feature." -# endif +#if defined(__cpp_pack_indexing) slices...[Inds] #else detail::get_kth_in_pack(slices...) diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index 3d622578..553871ed 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -38,6 +38,18 @@ class my_nonaggregate_pair { : first_(first), second_(second) {} +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + template + constexpr auto get() -> std::conditional_t { + if constexpr (Index == 0) { + return first_; + } + else { + static_assert(Index == 1); + return second_; + } + } +#else template constexpr decltype(auto) get(this Self&& self) { if constexpr (Index == 0) { @@ -50,6 +62,7 @@ class my_nonaggregate_pair { static_assert(false, "Invalid index"); } } +#endif private: First first_; @@ -64,7 +77,11 @@ struct std::tuple_size> template struct std::tuple_element> { +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + static_assert(Index == 0 || Index == 1, "Invalid index"); +#else static_assert(false, "Invalid index"); +#endif }; template diff --git a/tests/test_constant_wrapper.cpp b/tests/test_constant_wrapper.cpp index 1476e82d..fa703b13 100644 --- a/tests/test_constant_wrapper.cpp +++ b/tests/test_constant_wrapper.cpp @@ -24,6 +24,8 @@ namespace { // (anonymous) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + template using IC = std::integral_constant; @@ -67,5 +69,38 @@ TEST(TestConstantWrapper, Construction) { test_integral_constant_wrapper(IC{}); test_integral_constant_wrapper(IC{}); } +#endif + +TEST(TestConstantWrapper, IntegerPlus) { + std::constant_wrapper cw_11; + constexpr size_t value = cw_11; + constexpr size_t value2 = cw_11(); + static_assert(value == value2); + constexpr size_t value3 = decltype(cw_11)(); + static_assert(value == value3); + + static_assert(std::is_same_v< + decltype(cw_11), + decltype(std::cw)>); + + [[maybe_unused]] auto expected_result = std::cw; + using expected_type = std::constant_wrapper; + static_assert(std::is_same_v); + +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + [[maybe_unused]] auto cw_11_plus_one = cw_11 + std::cw; + [[maybe_unused]] auto one_plus_cw_11 = std::cw + cw_11; + + static_assert(! std::is_same_v< + decltype(cw_11 + std::cw), + size_t>); + static_assert(std::is_same_v< + decltype(cw_11 + std::cw), + std::constant_wrapper>); + static_assert(std::is_same_v< + decltype(std::cw + cw_11), + std::constant_wrapper>); +#endif +} } // namespace (anonymous) diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp index 392a98bf..3d18f7d9 100644 --- a/tests/test_strided_slice.cpp +++ b/tests/test_strided_slice.cpp @@ -46,7 +46,9 @@ template struct my_integral_constant { static constexpr T value = Value; constexpr operator T () const { return value; } +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) static constexpr T operator() () { return value; } +#endif }; template From aa698101129ee4dbf58917dbaeff780f2665b191 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 25 Jun 2025 12:52:39 -0600 Subject: [PATCH 041/103] Simplify GCC 11.4.0 C++20 work-around --- .../experimental/__p2630_bits/submdspan_extents.hpp | 12 ++++++------ tests/test_strided_slice.cpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 00851c4e..608590a0 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1028,19 +1028,19 @@ constexpr bool is_canonical_slice_type() { template MDSPAN_INLINE_FUNCTION constexpr void -check_canonical_kth_submdspan_slice_type(const extents& exts, Slice slice) +check_canonical_kth_submdspan_slice_type( + const extents&, + [[maybe_unused]] Slice slice) { if constexpr (! is_canonical_slice_type()) { -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + static_assert(is_canonical_slice_type()); +#else static_assert(false); #endif } else { // 3.2 -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) static_assert(check_static_bounds(extents{}) != check_static_bounds_result::out_of_bounds); -#else - static_assert(check_static_bounds(exts) != check_static_bounds_result::out_of_bounds); -#endif } } diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp index 3d18f7d9..d2fbe797 100644 --- a/tests/test_strided_slice.cpp +++ b/tests/test_strided_slice.cpp @@ -46,7 +46,7 @@ template struct my_integral_constant { static constexpr T value = Value; constexpr operator T () const { return value; } -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(__cpp_static_call_operator) static constexpr T operator() () { return value; } #endif }; From 0c8baeac1b2aa07579a43d9da97c3c2c07c1597a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 25 Jun 2025 13:22:16 -0600 Subject: [PATCH 042/103] Clean up GCC 11.4.0 C++20 work-around Remove constant_wrapper's overloaded arithmetic operators, since the work-around doesn't need them. --- .../__p2630_bits/constant_wrapper.hpp | 102 ++---------------- 1 file changed, 10 insertions(+), 92 deletions(-) diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index f877387b..ee00aff1 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -21,26 +21,6 @@ namespace exposition_only { struct cw_fixed_value; // exposition only } -// GCC 11.4 has ICE with -// "typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type" -// as second template parameter of constant_wrapper below. -// -// Replacing that expression with use of the following alias doesn't help. -// -// namespace exposition_only { -// template -// using unspecified_t = typename decltype(cw_fixed_value(X))::type; -// } -// -// Moving the definitions of the various specializations of cw_fixed_value -// above this point doesn't help either. -// -// Replacing the offending expression with -// -// typename unspecified = typename decltype(X)::type // exposition only -// -// gets us only part of the way there. - template< exposition_only::cw_fixed_value X, typename unspecified = @@ -199,20 +179,14 @@ namespace exposition_only { } template -struct constant_wrapper: exposition_only::cw_operators { +struct constant_wrapper : exposition_only::cw_operators { static constexpr const auto & value = X.data; using type = constant_wrapper; using value_type = typename decltype(X)::type; - // Leaving the work-around path in place, in case we need it later. template -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) - requires(std::is_assignable_v) -#endif - constexpr auto operator=(R) const noexcept -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) - requires requires(value_type x) { x = R::value; } -#endif + constexpr auto operator=(R) const noexcept + requires requires(value_type x) { x = R::value; } { return constant_wrapper< [] { auto v = value; return v = R::value; }() @@ -247,7 +221,7 @@ namespace exposition_only { constexpr bool is_cw_fixed_value_v> = true; } -// GCC 11.4 has ICE with +// GCC 11.4.0 (C++20) has an internal compiler error (ICE) with // "typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type" // as second template parameter of constant_wrapper below. // @@ -265,13 +239,14 @@ namespace exposition_only { // // typename unspecified = typename decltype(X)::type // exposition only // -// gets us only part of the way there. +// gets us only part of the way there. The problem ultimately relates to +// GCC 11.4.0 being unable to find the overloaded arithmetic operators. +// Our P3663 implementation doesn't depend on them, so we don't need +// the operators at all. template< exposition_only::cw_fixed_value X, - typename unspecified = - // typename decltype(exposition_only::cw_fixed_value(X))::type // exposition only - typename decltype(X)::type // exposition only + typename unspecified = typename decltype(X)::type // exposition only > struct constant_wrapper; @@ -292,71 +267,14 @@ namespace exposition_only { template cw_fixed_value(T) -> cw_fixed_value; // exposition only - - namespace cw_operators { // exposition only - template - struct adl { -#if ! defined(MDSPAN_CONSTANT_WRAPPER_OPERATOR_TEMPLATE_PARAM) -//#define MDSPAN_CONSTANT_WRAPPER_OPERATOR_TEMPLATE_PARAM constexpr_param -#define MDSPAN_CONSTANT_WRAPPER_OPERATOR_TEMPLATE_PARAM class -#endif - - // unary operators - template - friend constexpr auto operator+(T) noexcept -> constant_wrapper<(+T::value)> { return {}; } - template - friend constexpr auto operator-(T) noexcept -> constant_wrapper<(-T::value)> { return {}; } - - // binary operators - template - friend constexpr auto operator+(L, R) noexcept -> constant_wrapper<(L::value + R::value)> { return {}; } - template - friend constexpr auto operator-(L, R) noexcept -> constant_wrapper<(L::value - R::value)> { return {}; } - template - friend constexpr auto operator*(L, R) noexcept -> constant_wrapper<(L::value * R::value)> { return {}; } - template - friend constexpr auto operator/(L, R) noexcept -> constant_wrapper<(L::value / R::value)> { return {}; } - template - friend constexpr auto operator%(L, R) noexcept -> constant_wrapper<(L::value % R::value)> { return {}; } - - // comparisons - -#if defined(__cpp_impl_three_way_comparison) - template - friend constexpr auto operator<=>(L, R) noexcept -> constant_wrapper<(L::value <=> R::value)> { return {}; } -#endif - template - friend constexpr auto operator<(L, R) noexcept -> constant_wrapper<(L::value < R::value)> { return {}; } - template - friend constexpr auto operator<=(L, R) noexcept -> constant_wrapper<(L::value <= R::value)> { return {}; } - template - friend constexpr auto operator==(L, R) noexcept -> constant_wrapper<(L::value == R::value)> { return {}; } - template - friend constexpr auto operator!=(L, R) noexcept -> constant_wrapper<(L::value != R::value)> { return {}; } - template - friend constexpr auto operator>(L, R) noexcept -> constant_wrapper<(L::value > R::value)> { return {}; } - template - friend constexpr auto operator>=(L, R) noexcept -> constant_wrapper<(L::value >= R::value)> { return {}; } - }; - } // namespace cw_operators } // namespace exposition_only template -struct constant_wrapper : exposition_only::cw_operators::adl<> { +struct constant_wrapper { static constexpr const auto & value = X.data; using type = constant_wrapper; using value_type = typename decltype(X)::type; - // Leaving the work-around path in place, in case we need it later. - template - requires(std::is_assignable_v) - constexpr auto operator=(R) const noexcept - { - return constant_wrapper< - [] { auto v = value; return v = R::value; }() - >{}; - } - constexpr operator decltype(auto)() const noexcept { return value; } constexpr decltype(auto) operator()() const noexcept requires (!std::invocable) { return value; } }; From ea51c88d66a2f8ca2dfd8516852cd932ada5ef25 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 25 Jun 2025 14:09:56 -0600 Subject: [PATCH 043/103] Fix Clang 14 C++20 build --- .../__p2630_bits/constant_wrapper.hpp | 16 ++-- .../__p2630_bits/submdspan_extents.hpp | 42 +++++++++ tests/test_canonicalize_slices.cpp | 79 +++++++++++++++-- tests/test_strided_slice.cpp | 5 ++ tests/test_submdspan_check_static_bounds.cpp | 88 ++++++++++++------- 5 files changed, 179 insertions(+), 51 deletions(-) diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index ee00aff1..763b2029 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -244,14 +244,9 @@ namespace exposition_only { // Our P3663 implementation doesn't depend on them, so we don't need // the operators at all. -template< - exposition_only::cw_fixed_value X, - typename unspecified = typename decltype(X)::type // exposition only -> -struct constant_wrapper; - -template -concept constexpr_param = requires { typename constant_wrapper; }; // exposition only +// Clang 14 doesn't like the forward declaration of constant_wrapper, +// because it claims that the non-type template parameter X has a different +// type in the definition versus in the declaration. namespace exposition_only { template @@ -269,7 +264,10 @@ namespace exposition_only { cw_fixed_value(T) -> cw_fixed_value; // exposition only } // namespace exposition_only -template +template< + exposition_only::cw_fixed_value X, + typename unspecified = typename decltype(X)::type // exposition only +> struct constant_wrapper { static constexpr const auto & value = X.data; using type = constant_wrapper; diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 608590a0..c1b9860b 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1097,19 +1097,47 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli return canonical_ice(s); // 11.2 } else if constexpr (is_strided_slice::value) { // 11.3 +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. return strided_slice{ .offset = canonical_ice(s.offset), .extent = canonical_ice(s.extent), .stride = canonical_ice(s.stride) }; +#else + auto offset = canonical_ice(s.offset); + auto extent = canonical_ice(s.extent); + auto stride = canonical_ice(s.stride); + return strided_slice { + .offset = offset, + .extent = extent, + .stride = stride + }; +#endif } #if ! defined(__cpp_lib_tuple_like) || (__cpp_lib_tuple_like < 202311L) else if constexpr (detail::is_std_complex) { +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. return strided_slice{ .offset = canonical_ice(s.real()), .extent = canonical_ice(s.imag() - s.real()), .stride = std::cw }; +#else + auto offset = canonical_ice(s.real()); + auto extent = canonical_ice(s.imag() - s.real()); + auto stride = std::cw; + return strided_slice { + .offset = offset, + .extent = extent, + .stride = stride + }; +#endif } #endif else { // 11.4 @@ -1118,11 +1146,25 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli using S_k1 = decltype(s_k1); static_assert(std::is_convertible_v); static_assert(std::is_convertible_v); +#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) + // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. return strided_slice{ .offset = canonical_ice(s_k0), .extent = subtract_ice(s_k0, s_k1), .stride = std::cw }; +#else + auto offset = canonical_ice(s_k0); + auto extent = subtract_ice(s_k0, s_k1); + auto stride = std::cw; + return strided_slice { + .offset = offset, + .extent = extent, + .stride = stride + }; +#endif } } diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index 553871ed..33dcbf71 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -116,10 +116,17 @@ constexpr bool slice_equal(const Left&, Kokkos::full_extent_t) { return std::is_convertible_v; } +#if defined(__clang__) && (__clang_major__ < 15) +template +constexpr bool slice_equal( + const Kokkos::strided_slice& left, + const Kokkos::strided_slice& right) +#else template constexpr bool slice_equal( const Kokkos::strided_slice& left, const Kokkos::strided_slice& right) +#endif { return left.offset == right.offset && left.extent == right.extent && left.stride == right.stride; } @@ -174,33 +181,87 @@ TEST(CanonicalizeSlices, Rank1_integer_static) { TEST(CanonicalizeSlices, Rank1_pair) { constexpr auto slice0 = std::pair{std::integral_constant{}, 11}; + + constexpr auto offset = std::cw; + constexpr auto extent = size_t(4u); // 11 - 7 + constexpr auto stride = std::cw; +#if defined(__clang__) && (__clang_major__ < 15) + const auto expected_slices = std::tuple{ + Kokkos::strided_slice< + decltype(offset), + decltype(extent), + decltype(stride) + > { + offset, + extent, + stride + } + }; +#else constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - .offset = std::cw, - .extent = size_t(4u), // 11 - 7 - .stride = std::cw + .offset = offset, + .extent = extent, + .stride = stride }}; +#endif constexpr auto exts = Kokkos::extents{}; test_canonicalize_slices(expected_slices, exts, slice0); } TEST(CanonicalizeSlices, Rank1_aggregate_pair) { constexpr auto slice0 = my_test::my_aggregate_pair{7, 11}; + + constexpr auto offset = size_t(7u); + constexpr auto extent = (size_t(11u) - size_t(7u)); + constexpr auto stride = std::cw; +#if defined(__clang__) && (__clang_major__ < 15) + const auto expected_slices = std::tuple{ + Kokkos::strided_slice< + decltype(offset), + decltype(extent), + decltype(stride) + > { + offset, + extent, + stride + } + }; +#else constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - .offset = size_t(7u), - .extent = (size_t(11u) - size_t(7u)), - .stride = std::cw + .offset = offset, + .extent = extent, + .stride = stride }}; +#endif constexpr auto exts = Kokkos::extents{}; test_canonicalize_slices(expected_slices, exts, slice0); } TEST(CanonicalizeSlices, Rank1_nonaggregate_pair) { constexpr auto slice0 = my_test::my_nonaggregate_pair(7, 11); + + constexpr auto offset = size_t(7u); + constexpr auto extent = (size_t(11u) - size_t(7u)); + constexpr auto stride = std::cw; +#if defined(__clang__) && (__clang_major__ < 15) + const auto expected_slices = std::tuple{ + Kokkos::strided_slice< + decltype(offset), + decltype(extent), + decltype(stride) + > { + offset, + extent, + stride + } + }; +#else constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - .offset = size_t(7u), - .extent = (size_t(11u) - size_t(7u)), - .stride = std::cw + .offset = offset, + .extent = extent, + .stride = stride }}; +#endif constexpr auto exts = Kokkos::extents{}; test_canonicalize_slices(expected_slices, exts, slice0); } diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp index d2fbe797..21f6eae6 100644 --- a/tests/test_strided_slice.cpp +++ b/tests/test_strided_slice.cpp @@ -23,7 +23,12 @@ namespace { template void test_strided_slice(OffsetType offset, ExtentType extent, StrideType stride) { + // Clang 14 is bad at CTAD for aggregates. +#if defined(__clang__) && (__clang_major__ < 15) + Kokkos::strided_slice s{offset, extent, stride}; +#else Kokkos::strided_slice s{offset, extent, stride}; +#endif static_assert(std::is_same_v>); auto offset2 = s.offset; static_assert(std::is_same_v); diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index aa2160f3..60f07870 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -16,12 +16,16 @@ #include #include #include -#include #include #include #include +#include #include +#if defined(__cpp_lib_source_location) +# include +#endif + namespace test { template @@ -111,18 +115,36 @@ static_assert(! std::is_convertible_v, std: static_assert(! std::is_convertible_v, std::tuple>); static_assert(! test::has_get_like_pair, std::pair>); +// Clang 14 is bad at CTAD for aggregates. +template +constexpr aggregate_pair +make_aggregate_pair(const First& first, const Second& second) { + return aggregate_pair{first, second}; +} + template void test_check_static_bounds( Kokkos::extents extents, Kokkos::detail::check_static_bounds_result expected_result, - const std::source_location location = std::source_location::current()) +#if defined(__cpp_lib_source_location) + const std::source_location location = std::source_location::current() +#else + const int line = __LINE__ +#endif + ) { using Kokkos::detail::check_static_bounds; using Kokkos::detail::check_static_bounds_result; auto result = check_static_bounds(extents); static_assert(std::is_same_v); - EXPECT_EQ(result, expected_result) << "on line " << location.line(); + EXPECT_EQ(result, expected_result) << "on line " << +#if defined(__cpp_lib_source_location) + location.line() +#else + line +#endif + ; } template @@ -317,67 +339,67 @@ TEST(Submdspan, CheckStaticBounds) { // 14.4.1.1 { - using slice_type = decltype(aggregate_pair{IC<-1>{}, IC<0>{}}); + using slice_type = decltype(make_aggregate_pair(IC<-1>{}, IC<0>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } { - using slice_type = decltype(aggregate_pair{IC<-1>{}, int{0}}); + using slice_type = decltype(make_aggregate_pair(IC<-1>{}, int{0})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.2 { - using slice_type = decltype(aggregate_pair{IC<13>{}, IC<0>{}}); + using slice_type = decltype(make_aggregate_pair(IC<13>{}, IC<0>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } { - using slice_type = decltype(aggregate_pair{IC<13>{}, int{0}}); + using slice_type = decltype(make_aggregate_pair(IC<13>{}, int{0})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.3 { - using slice_type = decltype(aggregate_pair{IC<1>{}, IC<0>{}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, IC<0>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.4 { - using slice_type = decltype(aggregate_pair{IC<0>{}, IC<13>{}}); + using slice_type = decltype(make_aggregate_pair(IC<0>{}, IC<13>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.5 { - using slice_type = decltype(aggregate_pair{IC<1>{}, IC<3>{}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, IC<3>{})); test_check_static_bounds<0, slice_type>(exts, INB); test_check_static_bounds<1, slice_type>(exts, INB); test_check_static_bounds<2, slice_type>(exts, INB); } // 14.4.1.6 { - using slice_type = decltype(aggregate_pair{IC<1>{}, int{3}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, int{3})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.4.2 { - using slice_type = decltype(aggregate_pair{int{1}, IC<3>{}}); + using slice_type = decltype(make_aggregate_pair(int{1}, IC<3>{})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } { - using slice_type = decltype(aggregate_pair{int{1}, int{3}}); + using slice_type = decltype(make_aggregate_pair(int{1}, int{3})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); @@ -549,67 +571,67 @@ TEST(Submdspan, CheckStaticBounds) { // 14.4.1.1 { - using slice_type = decltype(aggregate_pair{IC<-1>{}, IC<0>{}}); + using slice_type = decltype(make_aggregate_pair(IC<-1>{}, IC<0>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } { - using slice_type = decltype(aggregate_pair{IC<-1>{}, int{0}}); + using slice_type = decltype(make_aggregate_pair(IC<-1>{}, int{0})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.2 (actually 14.4.1.6) { - using slice_type = decltype(aggregate_pair{IC<13>{}, IC<14>{}}); + using slice_type = decltype(make_aggregate_pair(IC<13>{}, IC<14>{})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } { - using slice_type = decltype(aggregate_pair{IC<13>{}, int{14}}); + using slice_type = decltype(make_aggregate_pair(IC<13>{}, int{14})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.4.1.3 { - using slice_type = decltype(aggregate_pair{IC<1>{}, IC<0>{}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, IC<0>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.4 (actually 14.4.1.6) { - using slice_type = decltype(aggregate_pair{IC<0>{}, IC<13>{}}); + using slice_type = decltype(make_aggregate_pair(IC<0>{}, IC<13>{})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.4.1.5 (actually 14.4.1.6) { - using slice_type = decltype(aggregate_pair{IC<1>{}, IC<3>{}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, IC<3>{})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.4.1.6 { - using slice_type = decltype(aggregate_pair{IC<1>{}, int{3}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, int{3})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.4.2 { - using slice_type = decltype(aggregate_pair{int{1}, IC<3>{}}); + using slice_type = decltype(make_aggregate_pair(int{1}, IC<3>{})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } { - using slice_type = decltype(aggregate_pair{int{1}, int{3}}); + using slice_type = decltype(make_aggregate_pair(int{1}, int{3})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); @@ -781,67 +803,67 @@ TEST(Submdspan, CheckStaticBounds) { // 14.4.1.1 { - using slice_type = decltype(aggregate_pair{IC<-1>{}, IC<0>{}}); + using slice_type = decltype(make_aggregate_pair(IC<-1>{}, IC<0>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } { - using slice_type = decltype(aggregate_pair{IC<-1>{}, int{0}}); + using slice_type = decltype(make_aggregate_pair(IC<-1>{}, int{0})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.2 (and 14.4.1.6) { - using slice_type = decltype(aggregate_pair{IC<13>{}, IC<14>{}}); + using slice_type = decltype(make_aggregate_pair(IC<13>{}, IC<14>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, UNK); // 14.4.1.6 test_check_static_bounds<2, slice_type>(exts, OOB); } { - using slice_type = decltype(aggregate_pair{IC<13>{}, int{14}}); + using slice_type = decltype(make_aggregate_pair(IC<13>{}, int{14})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, UNK); // 14.4.1.6 test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.3 { - using slice_type = decltype(aggregate_pair{IC<1>{}, IC<0>{}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, IC<0>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, OOB); test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.4 (and 14.4.1.6) { - using slice_type = decltype(aggregate_pair{IC<0>{}, IC<13>{}}); + using slice_type = decltype(make_aggregate_pair(IC<0>{}, IC<13>{})); test_check_static_bounds<0, slice_type>(exts, OOB); test_check_static_bounds<1, slice_type>(exts, UNK); // 14.4.1.6 test_check_static_bounds<2, slice_type>(exts, OOB); } // 14.4.1.5 (and 14.4.1.6) { - using slice_type = decltype(aggregate_pair{IC<1>{}, IC<3>{}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, IC<3>{})); test_check_static_bounds<0, slice_type>(exts, INB); test_check_static_bounds<1, slice_type>(exts, UNK); // 14.4.1.6 test_check_static_bounds<2, slice_type>(exts, INB); } // 14.4.1.6 { - using slice_type = decltype(aggregate_pair{IC<1>{}, int{3}}); + using slice_type = decltype(make_aggregate_pair(IC<1>{}, int{3})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } // 14.4.2 { - using slice_type = decltype(aggregate_pair{int{1}, IC<3>{}}); + using slice_type = decltype(make_aggregate_pair(int{1}, IC<3>{})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); } { - using slice_type = decltype(aggregate_pair{int{1}, int{3}}); + using slice_type = decltype(make_aggregate_pair(int{1}, int{3})); test_check_static_bounds<0, slice_type>(exts, UNK); test_check_static_bounds<1, slice_type>(exts, UNK); test_check_static_bounds<2, slice_type>(exts, UNK); From dc90e90e498dcca041e8e8c674f14062707ab633 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 26 Jun 2025 10:47:37 -0600 Subject: [PATCH 044/103] Fix submdspan benchmark when P3663 is disabled --- benchmarks/submdspan/submdspan.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/benchmarks/submdspan/submdspan.cpp b/benchmarks/submdspan/submdspan.cpp index 2c72217b..c91c27e6 100644 --- a/benchmarks/submdspan/submdspan.cpp +++ b/benchmarks/submdspan/submdspan.cpp @@ -67,11 +67,14 @@ BENCHMARK_CAPTURE(host_benchmark1, size_t_6d, (Kokkos::dextents{2, 2, namespace submdspan_benchmark { // Multiply elements by 3, using 1-D slices. -template +template void benchmark2_loop(ExecutionSpace exec_space, - nonconst_test_mdspan out) + Kokkos::mdspan, Layout> out) { - using mdspan_type = nonconst_test_mdspan; + using mdspan_type = Kokkos::mdspan, Layout>; if constexpr (mdspan_type::rank() == 0) { return; From edc4387a89b458f473209a1758df78bb7d4b8b15 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 26 Jun 2025 10:48:06 -0600 Subject: [PATCH 045/103] Make MDSPAN_CONSTANT_WRAPPER_WORKAROUND a CMake option * Rename option for working around constant_wrapper compiler issues from MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND to MDSPAN_CONSTANT_WRAPPER_WORKAROUND. * Make MDSPAN_CONSTANT_WRAPPER_WORKAROUND a CMake option. It's OFF by default. It only has an effect if the CMake option MDSPAN_ENABLE_P3663 is ON. --- CMakeLists.txt | 3 +++ benchmarks/CMakeLists.txt | 18 ++++++++++++++++++ benchmarks/submdspan_generic.hpp | 4 ++-- .../__p2630_bits/constant_wrapper.hpp | 7 ++----- .../experimental/__p2630_bits/submdspan.hpp | 2 +- .../__p2630_bits/submdspan_extents.hpp | 14 +++++++------- tests/CMakeLists.txt | 14 ++++++++------ tests/test_canonicalize_slices.cpp | 4 ++-- tests/test_constant_wrapper.cpp | 4 ++-- 9 files changed, 45 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ad471d8c..c069e55a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,9 @@ option(MDSPAN_ENABLE_CONCEPTS "Try to enable concepts support by giving extra fl option(MDSPAN_ENABLE_P3663 "Enable implementation of P3663 (Future-proof submdspan_mapping)." Off) +# Defaults to ON, because this has only been tested with Clang 21 (development Clang). +option(MDSPAN_CONSTANT_WRAPPER_WORKAROUND "If MDSPAN_ENABLE_P3663 is enabled, work around some compilers' inability to build constant_wrapper." ON) + ################################################################################ # Decide on the standard to use diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index a3329a32..918c8172 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -10,6 +10,12 @@ function(mdspan_add_benchmark EXENAME) if(MDSPAN_ENABLE_P3663) target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) endif() + if(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) + target_compile_definitions(${EXENAME} + PUBLIC + MDSPAN_CONSTANT_WRAPPER_WORKAROUND=1 + ) + endif() endfunction() if(MDSPAN_USE_SYSTEM_BENCHMARK) @@ -72,6 +78,12 @@ function(mdspan_add_cuda_benchmark EXENAME) if(MDSPAN_ENABLE_P3663) target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) endif() + if(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) + target_compile_definitions(${EXENAME} + PUBLIC + MDSPAN_CONSTANT_WRAPPER_WORKAROUND=1 + ) + endif() endfunction() if(MDSPAN_ENABLE_OPENMP) @@ -90,6 +102,12 @@ function(mdspan_add_openmp_benchmark EXENAME) if(MDSPAN_ENABLE_P3663) target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) endif() + if(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) + target_compile_definitions(${EXENAME} + PUBLIC + MDSPAN_CONSTANT_WRAPPER_WORKAROUND=1 + ) + endif() else() message(WARNING "Not adding target ${EXENAME} because OpenMP was not found") endif() diff --git a/benchmarks/submdspan_generic.hpp b/benchmarks/submdspan_generic.hpp index 4008cb3c..b7a7119d 100644 --- a/benchmarks/submdspan_generic.hpp +++ b/benchmarks/submdspan_generic.hpp @@ -29,7 +29,7 @@ namespace submdspan_benchmark { -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) template constexpr typename Kokkos::mdspan::reference @@ -220,7 +220,7 @@ constexpr MDSPAN_FUNCTION auto slice_one_extent( Kokkos::mdspan, Layout, Accessor> x, Slice slice) { if constexpr (sizeof...(Exts) == 0) { -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) static_assert(sizeof...(Exts) != 0, "slice_one_extent called with no extents"); #else static_assert(false, "slice_one_extent called with no extents"); diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index 763b2029..40006892 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -3,16 +3,13 @@ #include #include -// This generally works fine even with Clang 21. -#define MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND 1 - // Implementation borrowed from // https://github.com/tzlaine/constexpr/blob/master/include/constant_wrapper.hpp // to which P2781 links. Provisionally assume that the feature test // macro will be called __cpp_lib_constant_wrapper and that the // features in P2781 will go in . #if ! defined(__cpp_lib_constant_wrapper) -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) namespace std { @@ -282,5 +279,5 @@ template } // namespace std -#endif // ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#endif // ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) #endif // ! defined(__cpp_lib_constant_wrapper) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index c84004c7..f29346e9 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -29,7 +29,7 @@ submdspan(const mdspan &src, #if defined(MDSPAN_ENABLE_P3663) -# if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +# if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) auto canonical_slices_tuple = submdspan_canonicalize_slices(src.extents(), slices...); diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index c1b9860b..ab87e5a2 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -92,7 +92,7 @@ constexpr auto inv_map_rank( std::index_sequence >; -#if defined(MDSPAN_ENABLE_P3663) && ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_ENABLE_P3663) && ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) static_assert(std::is_same_v< decltype(counter + std::cw), std::constant_wrapper @@ -754,7 +754,7 @@ constexpr auto canonical_ice(S s) { template constexpr auto subtract_ice(X x, Y y) { -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) // Key to the work-around is acknowledging that GCC 11.4.0 can't find // constant_wrapper's overloaded arithmetic operators. if constexpr (__mdspan_integral_constant_like> && @@ -1033,7 +1033,7 @@ check_canonical_kth_submdspan_slice_type( [[maybe_unused]] Slice slice) { if constexpr (! is_canonical_slice_type()) { -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) static_assert(is_canonical_slice_type()); #else static_assert(false); @@ -1082,7 +1082,7 @@ constexpr auto submdspan_canonicalize_one_slice(const extents& exts, Slice s) { // Part of [mdspan.sub.slices] 9. // This could be combined with the if constexpr branches below. -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) static_assert(check_static_bounds(extents{}) != check_static_bounds_result::out_of_bounds); #else static_assert(check_static_bounds(exts) != check_static_bounds_result::out_of_bounds); @@ -1097,7 +1097,7 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli return canonical_ice(s); // 11.2 } else if constexpr (is_strided_slice::value) { // 11.3 -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. return strided_slice{ .offset = canonical_ice(s.offset), @@ -1119,7 +1119,7 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli } #if ! defined(__cpp_lib_tuple_like) || (__cpp_lib_tuple_like < 202311L) else if constexpr (detail::is_std_complex) { -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. return strided_slice{ .offset = canonical_ice(s.real()), @@ -1146,7 +1146,7 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli using S_k1 = decltype(s_k1); static_assert(std::is_convertible_v); static_assert(std::is_convertible_v); -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. return strided_slice{ .offset = canonical_ice(s_k0), diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 222cb9a8..fcc54f70 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -31,7 +31,13 @@ function(mdspan_add_test name) target_compile_definitions(${name} PUBLIC MDSPAN_ENABLE_P3663=1 - ) + ) + endif() + if(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) + target_compile_definitions(${name} + PUBLIC + MDSPAN_CONSTANT_WRAPPER_WORKAROUND=1 + ) endif() endfunction() @@ -111,13 +117,9 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL Clang) OR ((CMAKE_CXX_COMPILER_ID STREQUAL GN endif() endif() -#if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 20)) +if(MDSPAN_ENABLE_P3663) mdspan_add_test(test_constant_wrapper) -#endif() -if(MDSPAN_ENABLE_P3663 AND (CMAKE_CXX_STANDARD GREATER_EQUAL 17)) mdspan_add_test(test_strided_slice) -endif() -if(MDSPAN_ENABLE_P3663) mdspan_add_test(test_canonicalize_slices) mdspan_add_test(test_submdspan_check_static_bounds) endif() diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index 33dcbf71..c55d77eb 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -38,7 +38,7 @@ class my_nonaggregate_pair { : first_(first), second_(second) {} -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) template constexpr auto get() -> std::conditional_t { if constexpr (Index == 0) { @@ -77,7 +77,7 @@ struct std::tuple_size> template struct std::tuple_element> { -#if defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) static_assert(Index == 0 || Index == 1, "Invalid index"); #else static_assert(false, "Invalid index"); diff --git a/tests/test_constant_wrapper.cpp b/tests/test_constant_wrapper.cpp index fa703b13..c7654c82 100644 --- a/tests/test_constant_wrapper.cpp +++ b/tests/test_constant_wrapper.cpp @@ -24,7 +24,7 @@ namespace { // (anonymous) -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) template using IC = std::integral_constant; @@ -87,7 +87,7 @@ TEST(TestConstantWrapper, IntegerPlus) { using expected_type = std::constant_wrapper; static_assert(std::is_same_v); -#if ! defined(MDSPAN_CONSTANT_WRAPPER_GCC_WORKAROUND) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) [[maybe_unused]] auto cw_11_plus_one = cw_11 + std::cw; [[maybe_unused]] auto one_plus_cw_11 = std::cw + cw_11; From 50f372ad7a9e8eea8f009c2b8729164a5b1adb87 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 26 Jun 2025 13:40:52 -0600 Subject: [PATCH 046/103] Fix GCC 15.1.0 build GCC 15.1.0 can now build with the following options. MDSPAN_ENABLE_P3663=ON MDSPAN_CONSTANT_WRAPPER_WORKAROUND=OFF CMAKE_CXX_FLAGS="-std=c++2c" MDSPAN_CXX_STANDARD=26 --- .../experimental/__p2630_bits/submdspan.hpp | 20 ++++++++++--------- .../__p2630_bits/submdspan_extents.hpp | 11 +++++++--- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index f29346e9..5cc6be68 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -29,7 +29,17 @@ submdspan(const mdspan &src, #if defined(MDSPAN_ENABLE_P3663) -# if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +# if defined(__cpp_structured_bindings) && (__cpp_structured_bindings >= 202411L) + // Rely on P1061R10, "Structured bindings can introduce a pack." + // Clang 21 implements this, but GCC 15 does not. + + auto [...canonical_slices] = + submdspan_canonicalize_slices(src.extents(), slices...); + // NOTE Added to P3663R2: [canonical_]slices (incorrect formatting). + auto sub_map_result = + submdspan_mapping(src.mapping(), canonical_slices...); + +# else auto canonical_slices_tuple = submdspan_canonicalize_slices(src.extents(), slices...); @@ -38,14 +48,6 @@ submdspan(const mdspan &src, return submdspan_mapping(src.mapping(), std::forward(the_slices)...); }, canonical_slices_tuple); -# else - - auto [...canonical_slices] = - submdspan_canonicalize_slices(src.extents(), slices...); - // NOTE Added to P3663R2: [canonical_]slices (incorrect formatting). - auto sub_map_result = - submdspan_mapping(src.mapping(), canonical_slices...); - # endif // NOTE Added to P3663R2: It's src.data_handle(), not src.data(). diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index ab87e5a2..888d7739 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1044,7 +1044,10 @@ check_canonical_kth_submdspan_slice_type( } } -#if ! defined(__cpp_pack_indexing) + +#if defined(__cpp_pack_indexing) && (! (defined(__GNUC__) && (__GNUC__ < 16))) +// nothing +#else template constexpr decltype(auto) get_kth_in_pack(First&& first, Rest&&... rest) { static_assert(k <= sizeof...(Rest)); @@ -1066,7 +1069,7 @@ check_canonical_kth_subdmspan_slice_types( [&] (std::index_sequence) { (check_canonical_kth_submdspan_slice_type( exts, -#if defined(__cpp_pack_indexing) +#if defined(__cpp_pack_indexing) && (! (defined(__GNUC__) && (__GNUC__ < 16))) slices...[Inds] #else get_kth_in_pack(slices...) @@ -1182,7 +1185,9 @@ submdspan_canonicalize_slices(const extents& exts, Slices // That implements the Mandates clause of [mdspan.sub.slices] 9. detail::submdspan_canonicalize_one_slice( exts, -#if defined(__cpp_pack_indexing) + // Clang 21 accepts this code. + // GCC 15.1.0 emits an error: "cannot index an empty pack." +#if defined(__cpp_pack_indexing) && (! (defined(__GNUC__) && (__GNUC__ < 16))) slices...[Inds] #else detail::get_kth_in_pack(slices...) From 20e4c6bea19aae317ac24c99d76e0130bb244c8e Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 5 Feb 2026 22:28:49 -0700 Subject: [PATCH 047/103] Fix build issue from merge --- include/experimental/__p2630_bits/strided_slice.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index f1efe6b5..31b0c682 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -72,9 +72,9 @@ struct strided_slice { MDSPAN_IMPL_NO_UNIQUE_ADDRESS ExtentType extent{}; MDSPAN_IMPL_NO_UNIQUE_ADDRESS StrideType stride{}; - static_assert(__mdspan_is_index_like_v); - static_assert(__mdspan_is_index_like_v); - static_assert(__mdspan_is_index_like_v); + static_assert(detail::__mdspan_is_index_like_v); + static_assert(detail::__mdspan_is_index_like_v); + static_assert(detail::__mdspan_is_index_like_v); }; } // MDSPAN_IMPL_STANDARD_NAMESPACE From d7bf6c8bb9d3f79ee8ed15e912e22a94c0787e8e Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 5 Feb 2026 22:32:16 -0700 Subject: [PATCH 048/103] Remove extra semicolon --- include/experimental/__p2630_bits/strided_slice.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 31b0c682..4b2e0f8e 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -58,7 +58,6 @@ namespace detail { #else mdspan_is_integral_constant::value; #endif - ; } // namespace detail // Slice Specifier allowing for strides and compile time extent From b8f90286c2abeb969e288bf10de5277d99b20027 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 5 Feb 2026 22:38:33 -0700 Subject: [PATCH 049/103] Fix some clang C++14 build errors --- .../__p2630_bits/submdspan_extents.hpp | 15 +++++++++++++-- .../__p2630_bits/submdspan_mapping.hpp | 6 ++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 888d7739..37aa992b 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -295,16 +295,27 @@ first_of(const strided_slice &r) { // We need however not just the slice but also the extents // of the original view and which rank from the extents. // This is needed in the case of slice being full_extent_t. -MDSPAN_TEMPLATE_REQUIRES( + +// clang++ with C++14 is not fond of the pragma appearing inside the +// macro definition. In that case, it complains, "error: embedding a +// directive within macro arguments has undefined behavior +// [-Werror,-Wembedded-directive]." The fix is to duplicate code. + #if defined(MDSPAN_ENABLE_P3663) +MDSPAN_TEMPLATE_REQUIRES( auto k, + class Extents, + class Integral, + /* requires */(std::is_convertible_v) +) #else +MDSPAN_TEMPLATE_REQUIRES( size_t k, -#endif class Extents, class Integral, /* requires */(std::is_convertible_v) ) +#endif // MDSPAN_ENABLE_P3663 MDSPAN_INLINE_FUNCTION constexpr Integral last_of( #if defined(MDSPAN_ENABLE_P3663) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index f0ae9b31..db047f26 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -113,8 +113,10 @@ one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { // P3663 and no-P3663 cases, we don't want to copy the slice if not needed. // Thus, we introduce a special case. if constexpr (std::is_convertible_v && - ! std::is_signed_v> && - ! std::is_unsigned_v>) + ! std::is_signed_v< + std::remove_cv_t>> && + ! std::is_unsigned_v< + std::remove_cv_t>>) { return first_of(static_cast(slice)) == ext; } From da5eb54c29c83cbd5d7ba72a10a342d63860a076 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 5 Feb 2026 22:43:26 -0700 Subject: [PATCH 050/103] Remove C++20 requirement from benchmark --- benchmarks/submdspan_generic.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/benchmarks/submdspan_generic.hpp b/benchmarks/submdspan_generic.hpp index b7a7119d..a5bacc63 100644 --- a/benchmarks/submdspan_generic.hpp +++ b/benchmarks/submdspan_generic.hpp @@ -35,7 +35,7 @@ template::reference get_broadcast_element_impl( const Kokkos::mdspan& x, - typename Extents::index_type broadcast_index, + [[maybe_unused]] typename Extents::index_type broadcast_index, std::index_sequence) { #if defined(MDSPAN_USE_BRACKET_OPERATOR) && (MDSPAN_USE_BRACKET_OPERATOR != 0) @@ -215,6 +215,15 @@ struct full_extent_wrapper_t { } }; +template +constexpr MDSPAN_FUNCTION auto slice_one_extent_impl( + const Kokkos::mdspan, Layout, Accessor>& x, + Slice slice, + std::index_sequence) +{ + return Kokkos::submdspan(x, slice, ((void) Inds, full_extent_wrapper_t{})...); +} + template constexpr MDSPAN_FUNCTION auto slice_one_extent( Kokkos::mdspan, Layout, Accessor> x, Slice slice) @@ -230,9 +239,7 @@ constexpr MDSPAN_FUNCTION auto slice_one_extent( return Kokkos::submdspan(x, slice); } else { - return [&] (std::index_sequence) { - return Kokkos::submdspan(x, slice, ((void) Inds, full_extent_wrapper_t{})...); - } (std::make_index_sequence()); + return slice_one_extent_impl(x, slice, std::make_index_sequence()); } } From 7fdd1d3fbdee05e8a40796d431ceeb3a2c51fb96 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 5 Feb 2026 22:53:42 -0700 Subject: [PATCH 051/103] Fix (spurious) unused parameter warning --- benchmarks/submdspan/submdspan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/submdspan/submdspan.cpp b/benchmarks/submdspan/submdspan.cpp index c91c27e6..3fd3578d 100644 --- a/benchmarks/submdspan/submdspan.cpp +++ b/benchmarks/submdspan/submdspan.cpp @@ -70,7 +70,7 @@ namespace submdspan_benchmark { template -void benchmark2_loop(ExecutionSpace exec_space, +void benchmark2_loop([[maybe_unused]] ExecutionSpace exec_space, Kokkos::mdspan, Layout> out) { using mdspan_type = Kokkos::mdspan Date: Fri, 6 Feb 2026 09:23:52 -0700 Subject: [PATCH 052/103] Try setting MDSPAN_ENABLE_P3663=ON by default I expect pre-C++20 compilers not to like this. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5de219db..38a71986 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ set_property(CACHE MDSPAN_CXX_STANDARD PROPERTY STRINGS DETECT 14 17 20 23 26) option(MDSPAN_ENABLE_CONCEPTS "Try to enable concepts support by giving extra flags." On) -option(MDSPAN_ENABLE_P3663 "Enable implementation of P3663 (Future-proof submdspan_mapping)." Off) +option(MDSPAN_ENABLE_P3663 "Enable implementation of P3663 (Future-proof submdspan_mapping)." On) # Defaults to ON, because this has only been tested with Clang 21 (development Clang). option(MDSPAN_CONSTANT_WRAPPER_WORKAROUND "If MDSPAN_ENABLE_P3663 is enabled, work around some compilers' inability to build constant_wrapper." ON) From 61c84fdef5aa33eea5829060b9c4605ace41bc76 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Feb 2026 11:50:51 -0700 Subject: [PATCH 053/103] Attempt to "back-port" constant_wrapper to C++17 --- .../__p2630_bits/constant_wrapper.hpp | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index 40006892..a244d1dd 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -261,6 +261,10 @@ namespace exposition_only { cw_fixed_value(T) -> cw_fixed_value; // exposition only } // namespace exposition_only + +// This definition requires C++20 because it uses nontype template +// parameters of deduced class type. +#if(__cplusplus >= 202002L) template< exposition_only::cw_fixed_value X, typename unspecified = typename decltype(X)::type // exposition only @@ -271,12 +275,41 @@ struct constant_wrapper { using value_type = typename decltype(X)::type; constexpr operator decltype(auto)() const noexcept { return value; } - constexpr decltype(auto) operator()() const noexcept requires (!std::invocable) { return value; } + constexpr decltype(auto) operator()() const noexcept + requires (!std::invocable) + { + return value; + } +}; +#else + +template +struct constant_wrapper { + static constexpr exposition_only::cw_fixed_value X{}; + + static constexpr const auto & value = X.data; + using type = constant_wrapper; + using value_type = typename decltype(X)::type; + + constexpr operator decltype(auto)() const noexcept { return value; } + constexpr decltype(auto) operator()() const noexcept { return value; } }; +#endif // (__cplusplus >= 202002L) + +#if defined(__cpp_constinit) template constinit auto cw = constant_wrapper{}; +#elif(__cplusplus >= 202002L) +template + constexpr auto cw = constant_wrapper{}; + +#else +template + constexpr auto cw = constant_wrapper{}; +#endif + } // namespace std #endif // ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) From 86651ad7ffc7c7ef3337ed1d444fef1bf81a24ad Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Feb 2026 15:06:37 -0700 Subject: [PATCH 054/103] Backwards-compatible integral-constant-like Add a C++17 - compatible type trait for integral-constant-like. --- .../__p2630_bits/equality_comparable.hpp | 83 +++++++++++++++++++ .../__p2630_bits/integral_constant_like.hpp | 46 ++++++++++ .../__p2630_bits/remove_cvref.hpp | 20 +++++ .../__p2630_bits/strided_slice.hpp | 18 ++-- 4 files changed, 159 insertions(+), 8 deletions(-) create mode 100644 include/experimental/__p2630_bits/equality_comparable.hpp create mode 100644 include/experimental/__p2630_bits/integral_constant_like.hpp create mode 100644 include/experimental/__p2630_bits/remove_cvref.hpp diff --git a/include/experimental/__p2630_bits/equality_comparable.hpp b/include/experimental/__p2630_bits/equality_comparable.hpp new file mode 100644 index 00000000..fa8de05d --- /dev/null +++ b/include/experimental/__p2630_bits/equality_comparable.hpp @@ -0,0 +1,83 @@ +#pragma once + +#include "../__p0009_bits/macros.hpp" +#if defined(__cpp_lib_concepts) +# include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + namespace detail { + template + struct is_equality_comparable : std::bool_constant> {}; + + template + struct is_equality_comparable_with : std::bool_constant> {}; + } // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#else + +#include +#include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + + template + struct is_equality_comparable : std::false_type {}; + + template + struct is_equality_comparable< + T, + std::void_t< + decltype(std::declval() == std::declval()), + decltype(std::declval() != std::declval()) + > + > : std::bool_constant< + std::is_convertible_v< + decltype(std::declval() == std::declval()), + bool + > && + std::is_convertible_v< + decltype(std::declval() != std::declval()), + bool + > + > {}; + + template + struct is_equality_comparable_with : std::false_type {}; + + template + struct is_equality_comparable_with< + T, U, + std::void_t< + decltype(std::declval() == std::declval()), + decltype(std::declval() != std::declval()), + decltype(std::declval() == std::declval()), + decltype(std::declval() != std::declval()) + > + > : std::bool_constant< + std::is_equality_comparable::value && + std::is_equality_comparable::value && + std::is_convertible_v< + decltype(std::declval() == std::declval()), + bool + > && + std::is_convertible_v< + decltype(std::declval() != std::declval()), + bool + > && + std::is_convertible_v< + decltype(std::declval() == std::declval()), + bool + > && + std::is_convertible_v< + decltype(std::declval() != std::declval()), + bool + > + > {}; + +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#endif // defined(__cpp_lib_concepts) + diff --git a/include/experimental/__p2630_bits/integral_constant_like.hpp b/include/experimental/__p2630_bits/integral_constant_like.hpp new file mode 100644 index 00000000..3f36c3dd --- /dev/null +++ b/include/experimental/__p2630_bits/integral_constant_like.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include "equality_comparable.hpp" +#include "remove_cvref.hpp" +#if defined(__cpp_lib_concepts) +# include +#endif // __cpp_lib_concepts + +#if defined(__cpp_lib_concepts) + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + namespace detail { + + template + concept integral_constant_like = + std::is_integral_v> && + !std::is_same_v> && + std::convertible_to && + std::equality_comparable_with && + std::bool_constant::value && + std::bool_constant(T()) == T::value>::value; + + template + constexpr bool is_integral_constant_like_v = integral_constant_like; + + } // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#else + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + namespace detail { + + template + constexpr bool is_integral_constant_like_v = + std::is_integral_v> && + ! std::is_same_v> && + std::is_convertible_to_v && + is_equality_comparable_with_v && + std::bool_constant::value && + std::bool_constant(T()) == T::value>::value; + + } // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#endif // __cpp_lib_concepts diff --git a/include/experimental/__p2630_bits/remove_cvref.hpp b/include/experimental/__p2630_bits/remove_cvref.hpp new file mode 100644 index 00000000..f36dff76 --- /dev/null +++ b/include/experimental/__p2630_bits/remove_cvref.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + namespace detail { + +#if (__cplusplus >= 202002L) + using std::remove_cvref_t; +#else + template + struct remove_cvref { + using type = typename std::remove_cv_t>; + }; + template + using remove_cvref_t = typename remove_cvref::type; +#endif // __cplusplus >= 202002L + + } // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 4b2e0f8e..c808826b 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -17,7 +17,7 @@ #pragma once -#include "../__p0009_bits/macros.hpp" +#include "integral_constant_like.hpp" #if defined(MDSPAN_ENABLE_P3663) # include "constant_wrapper.hpp" #endif @@ -27,14 +27,16 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { #if defined(MDSPAN_ENABLE_P3663) + +#if defined(__cpp_lib_concepts) +template +concept __mdspan_integral_constant_like = detail::integral_constant_like; +#else template -concept __mdspan_integral_constant_like = - std::is_integral_v> && - ! std::is_same_v> && - std::convertible_to && - std::equality_comparable_with && - std::bool_constant::value && - std::bool_constant(T()) == T::value>::value; +constexpr bool __mdspan_integral_constant_like = detail::is_integral_constant_like_v; +#endif + + #endif // MDSPAN_ENABLE_P3663 namespace detail { From 2bedcc3237d28ed625e3d281d099998504b64297 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Feb 2026 15:17:32 -0700 Subject: [PATCH 055/103] Redefine mdspan_is_integral_constant correctly It was only true before for std::integral_constant, which means that the implementation wasn't conforming to the pre-P3663 status quo. --- .../__p2630_bits/strided_slice.hpp | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index c808826b..169ca8df 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -36,30 +36,18 @@ template constexpr bool __mdspan_integral_constant_like = detail::is_integral_constant_like_v; #endif - #endif // MDSPAN_ENABLE_P3663 namespace detail { template - struct mdspan_is_integral_constant: std::false_type {}; - -#if defined(MDSPAN_ENABLE_P3663) - template<__mdspan_integral_constant_like T> - struct mdspan_is_integral_constant : std::true_type {}; -#else - // NOTE Does this mean existing code is not conforming? - template - struct mdspan_is_integral_constant>: std::true_type {}; -#endif + struct mdspan_is_integral_constant : + std::bool_constant> + {}; template constexpr bool __mdspan_is_index_like_v = (std::is_integral_v && ! std::is_same_v) || -#if defined(MDSPAN_ENABLE_P3663) - __mdspan_integral_constant_like; -#else - mdspan_is_integral_constant::value; -#endif + is_integral_constant_like_v; } // namespace detail // Slice Specifier allowing for strides and compile time extent From 684f978949b3008a49974d64f8fed2e0b21173c1 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Feb 2026 15:19:48 -0700 Subject: [PATCH 056/103] Remove mdspan_is_integral_constant which is unused --- include/experimental/__p2630_bits/strided_slice.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 169ca8df..3f624b48 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -39,11 +39,6 @@ constexpr bool __mdspan_integral_constant_like = detail::is_integral_constant_li #endif // MDSPAN_ENABLE_P3663 namespace detail { - template - struct mdspan_is_integral_constant : - std::bool_constant> - {}; - template constexpr bool __mdspan_is_index_like_v = (std::is_integral_v && ! std::is_same_v) || From a5f2a451021b26662cfa92e00866dde6889f697a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Feb 2026 15:27:00 -0700 Subject: [PATCH 057/103] Convert StaticExtentFromRange so that it works for all integral-constant-like, even when MDSPAN_ENABLE_P3663 is OFF. --- .../__p2630_bits/submdspan_extents.hpp | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 37aa992b..4fa1aeef 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -562,28 +562,18 @@ constexpr auto multiply(const std::integral_constant &, #endif // compute new static extent from range, preserving static knowledge -template struct StaticExtentFromRange { - constexpr static size_t value = dynamic_extent; -}; - -#if defined(MDSPAN_ENABLE_P3663) -template <__mdspan_integral_constant_like A, __mdspan_integral_constant_like B> -struct StaticExtentFromRange { - constexpr static size_t value = B::value - A::value; -}; -#else -template -struct StaticExtentFromRange, - std::integral_constant> { - constexpr static size_t value = val1 - val0; +template && is_integral_constant_like_v +> +struct StaticExtentFromRange { + static constexpr ::std::size_t value = dynamic_extent; }; -template -struct StaticExtentFromRange, - integral_constant> { - constexpr static size_t value = val1 - val0; +template +struct StaticExtentFromRange { + static constexpr ::std::size_t value = B::value - A::value; }; -#endif // compute new static extent from strided_slice, preserving static // knowledge From ef95a53be253828c7a0e60200c8b59c987c90118 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Feb 2026 15:43:06 -0700 Subject: [PATCH 058/103] Get rid of __mdspan_integral_constant_like --- .../__p2630_bits/strided_slice.hpp | 12 ------------ .../__p2630_bits/submdspan_extents.hpp | 18 +++++++++--------- tests/test_strided_slice.cpp | 2 +- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 3f624b48..3ddc828b 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -26,18 +26,6 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -#if defined(MDSPAN_ENABLE_P3663) - -#if defined(__cpp_lib_concepts) -template -concept __mdspan_integral_constant_like = detail::integral_constant_like; -#else -template -constexpr bool __mdspan_integral_constant_like = detail::is_integral_constant_like_v; -#endif - -#endif // MDSPAN_ENABLE_P3663 - namespace detail { template constexpr bool __mdspan_is_index_like_v = diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 4fa1aeef..471abb34 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -745,7 +745,7 @@ constexpr auto canonical_ice(S s) { // cast to IndexType before being used as the template argument // of `cw`, so we don't get a weird constant_wrapper whose value // has a different type than the second template argument. - if constexpr (__mdspan_integral_constant_like) { + if constexpr (is_integral_constant_like_v) { return std::cw(index_cast(S::value))>; } else { @@ -758,8 +758,8 @@ constexpr auto subtract_ice(X x, Y y) { #if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) // Key to the work-around is acknowledging that GCC 11.4.0 can't find // constant_wrapper's overloaded arithmetic operators. - if constexpr (__mdspan_integral_constant_like> && - __mdspan_integral_constant_like>) + if constexpr (is_integral_constant_like_v> && + is_integral_constant_like_v>) { return std::cw(Y::value) - canonical_ice(X::value))>; } @@ -776,7 +776,7 @@ constexpr T de_ice(T val) { return val; } -template<__mdspan_integral_constant_like T> +template constexpr auto de_ice(T) { return T::value; } @@ -843,7 +843,7 @@ template return check_static_bounds_result::in_bounds; } else if constexpr (std::is_convertible_v) { - if constexpr (__mdspan_integral_constant_like) { + if constexpr (is_integral_constant_like_v) { // integral-constant-like types are default constructible // in constant expressions, so it's OK to use S_k{} here // instead of std::declval. Also, expressions like @@ -868,7 +868,7 @@ template else if constexpr (is_strided_slice::value) { using offset_type = typename S_k::offset_type; - if constexpr (__mdspan_integral_constant_like) { + if constexpr (is_integral_constant_like_v) { if constexpr (de_ice(offset_type{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.1 } @@ -877,7 +877,7 @@ template { return check_static_bounds_result::out_of_bounds; // 14.3.2 } - else if constexpr (__mdspan_integral_constant_like) { + else if constexpr (is_integral_constant_like_v) { using extent_type = typename S_k::extent_type; if constexpr (de_ice(offset_type{}) + de_ice(extent_type{}) < 0) { @@ -931,7 +931,7 @@ template }; using S_k0 = decltype(get_first(std::declval())); using S_k1 = decltype(get_second(std::declval())); - if constexpr (__mdspan_integral_constant_like) { + if constexpr (is_integral_constant_like_v) { if constexpr (de_ice(S_k0{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.4.1 } @@ -941,7 +941,7 @@ template { return check_static_bounds_result::out_of_bounds; // 14.4.2 } - else if constexpr (__mdspan_integral_constant_like) { + else if constexpr (is_integral_constant_like_v) { if constexpr ( de_ice(S_k1{}) < de_ice(S_k0{})) { diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp index 21f6eae6..ebe12e6c 100644 --- a/tests/test_strided_slice.cpp +++ b/tests/test_strided_slice.cpp @@ -70,7 +70,7 @@ static_assert( decltype(my_integral_constant::value)>); static_assert( - Kokkos::__mdspan_integral_constant_like< + Kokkos::detail::is_integral_constant_like_v< my_integral_constant >); #endif // MDSPAN_ENABLE_P3663 From 00ce7f35b26bf158d06b0107027f704b0a73fa66 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 6 Feb 2026 16:46:25 -0700 Subject: [PATCH 059/103] Introduce mdspan_constant_wrapper alias --- include/experimental/__p2630_bits/strided_slice.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 3ddc828b..0c75cc77 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -27,6 +27,15 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { + +#if defined(MDSPAN_ENABLE_P3663) + template + using mdspan_constant_wrapper = decltype(std::cw); +#else + template + using mdspan_constant_wrapper = std::integral_constant; +#endif // MDSPAN_ENABLE_P3663 + template constexpr bool __mdspan_is_index_like_v = (std::is_integral_v && ! std::is_same_v) || From b4494eae1509aef69087da52fc1a993f31f6628a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 14:03:39 -0700 Subject: [PATCH 060/103] Some simplifications --- .../__p2630_bits/equality_comparable.hpp | 4 +- .../__p2630_bits/integral_constant_like.hpp | 4 +- .../__p2630_bits/strided_slice.hpp | 6 ++ .../__p2630_bits/submdspan_extents.hpp | 84 +++++++------------ .../__p2630_bits/submdspan_mapping.hpp | 62 +++----------- 5 files changed, 53 insertions(+), 107 deletions(-) diff --git a/include/experimental/__p2630_bits/equality_comparable.hpp b/include/experimental/__p2630_bits/equality_comparable.hpp index fa8de05d..b00cc854 100644 --- a/include/experimental/__p2630_bits/equality_comparable.hpp +++ b/include/experimental/__p2630_bits/equality_comparable.hpp @@ -56,8 +56,8 @@ namespace detail { decltype(std::declval() != std::declval()) > > : std::bool_constant< - std::is_equality_comparable::value && - std::is_equality_comparable::value && + is_equality_comparable::value && + is_equality_comparable::value && std::is_convertible_v< decltype(std::declval() == std::declval()), bool diff --git a/include/experimental/__p2630_bits/integral_constant_like.hpp b/include/experimental/__p2630_bits/integral_constant_like.hpp index 3f36c3dd..90b5b17d 100644 --- a/include/experimental/__p2630_bits/integral_constant_like.hpp +++ b/include/experimental/__p2630_bits/integral_constant_like.hpp @@ -35,8 +35,8 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { constexpr bool is_integral_constant_like_v = std::is_integral_v> && ! std::is_same_v> && - std::is_convertible_to_v && - is_equality_comparable_with_v && + std::is_convertible_v && + is_equality_comparable_with::value && std::bool_constant::value && std::bool_constant(T()) == T::value>::value; diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 0c75cc77..ba1d4b64 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -31,6 +31,12 @@ namespace detail { #if defined(MDSPAN_ENABLE_P3663) template using mdspan_constant_wrapper = decltype(std::cw); + + template + constexpr bool is_constant_wrapper = false; + + template + constexpr bool is_constant_wrapper> = true; #else template using mdspan_constant_wrapper = std::integral_constant; diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 471abb34..3b46c7d9 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -25,34 +25,21 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { -#if defined(MDSPAN_ENABLE_P3663) -template -constexpr bool is_constant_wrapper = false; - -template -constexpr bool is_constant_wrapper> = true; -#endif - // Mapping from submapping ranks to srcmapping ranks // InvMapRank is an index_sequence, which we build recursively // to contain the mapped indices. // end of recursion specialization containing the final index_sequence +// NOTE (mfh 2026/02/06) This inexplicably only works with std::integral_constant. +// That's fine; it's not exposed to users anyway. + template< -#if defined(MDSPAN_ENABLE_P3663) - auto Counter, -#else size_t Counter, -#endif size_t... MapIdxs > MDSPAN_INLINE_FUNCTION -constexpr auto inv_map_rank( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper, -#else +constexpr auto inv_map_rank_impl( std::integral_constant, -#endif std::index_sequence) { return std::index_sequence(); @@ -60,55 +47,44 @@ constexpr auto inv_map_rank( // specialization reducing rank by one (i.e., integral slice specifier) template< -#if defined(MDSPAN_ENABLE_P3663) - auto Counter, -#else size_t Counter, -#endif class Slice, class... SliceSpecifiers, size_t... MapIdxs> MDSPAN_INLINE_FUNCTION -constexpr auto inv_map_rank( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper counter, -#else - std::integral_constant, -#endif +constexpr auto inv_map_rank_impl( + std::integral_constant counter, std::index_sequence, Slice, SliceSpecifiers... slices) { - constexpr size_t counter_value = -#if defined(MDSPAN_ENABLE_P3663) - decltype(counter){}(); -#else - Counter; -#endif - using next_idx_seq_t = std::conditional_t< std::is_convertible_v, std::index_sequence, - std::index_sequence + std::index_sequence >; -#if defined(MDSPAN_ENABLE_P3663) && ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - static_assert(std::is_same_v< - decltype(counter + std::cw), - std::constant_wrapper - >); -#endif - - return inv_map_rank( -#if defined(MDSPAN_ENABLE_P3663) - std::cw, -#else + return inv_map_rank_impl( std::integral_constant(), -#endif next_idx_seq_t(), slices...); } +template< + class... SliceSpecifiers, + size_t... MapIdxs +> +MDSPAN_INLINE_FUNCTION +constexpr auto inv_map_rank( + std::index_sequence seq, + SliceSpecifiers... slices) +{ + return inv_map_rank_impl( + std::integral_constant(), + seq, + slices...); +} + // Helper for identifying strided_slice template struct is_strided_slice : std::false_type {}; @@ -158,8 +134,10 @@ struct index_pair_like, IndexType> { #if defined(MDSPAN_ENABLE_P3663) -template - requires (std::is_signed_v || std::is_unsigned_v) +MDSPAN_TEMPLATE_REQUIRES( + class Integral, + /* requires */(std::is_signed_v || std::is_unsigned_v) +) MDSPAN_INLINE_FUNCTION constexpr Integral first_of(Integral i) { return i; @@ -715,15 +693,15 @@ struct extents_constructor<0, Extents, NewStaticExtents...> { namespace detail { template - requires(std::is_signed_v> || - std::is_unsigned_v>) + requires(std::is_signed_v> || + std::is_unsigned_v>) constexpr auto index_cast(OtherIndexType&& i) noexcept { return i; } template - requires(! std::is_signed_v> && - !std::is_unsigned_v>) + requires(! std::is_signed_v> && + !std::is_unsigned_v>) constexpr auto index_cast(OtherIndexType&& i) noexcept { return static_cast(i); } diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index db047f26..a7d53cd4 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -332,13 +332,7 @@ layout_left::mapping::submdspan_mapping_impl( } else { // layout_stride case using dst_mapping_t = typename layout_stride::mapping; - auto inv_map = detail::inv_map_rank( -#if defined(MDSPAN_ENABLE_P3663) - std::cw, -#else - std::integral_constant(), -#endif - std::index_sequence<>(), slices...); + auto inv_map = detail::inv_map_rank(std::index_sequence<>(), slices...); return submdspan_mapping_result { dst_mapping_t(mdspan_non_standard, dst_ext, detail::construct_sub_strides( @@ -421,30 +415,15 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping{ dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; } else { // layout_stride - auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank( -#if defined(MDSPAN_ENABLE_P3663) - std::cw, -#else - std::integral_constant(), -#endif - std::index_sequence<>(), slices...); - using dst_mapping_t = typename layout_stride::template mapping; - return submdspan_mapping_result { - dst_mapping_t(mdspan_non_standard, dst_ext, - MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( - *this, inv_map, -// HIP needs deduction guides to have markups so we need to be explicit -// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have -// the issue but Clang-CUDA also doesn't accept the use of deduction guide so -// disable it for CUDA alltogether -#if defined(MDSPAN_IMPL_HAS_HIP) || defined(MDSPAN_IMPL_HAS_CUDA) - MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{ - MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), -#else - MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), -#endif - offset - }; + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank( + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(mdspan_non_standard, dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...}).values), + offset}; } } } @@ -586,13 +565,7 @@ layout_right::mapping::submdspan_mapping_impl( } else { // layout_stride case using dst_mapping_t = typename layout_stride::mapping; - auto inv_map = detail::inv_map_rank( -#if defined(MDSPAN_ENABLE_P3663) - std::cw, -#else - std::integral_constant(), -#endif - std::index_sequence<>(), slices...); + auto inv_map = detail::inv_map_rank(std::index_sequence<>(), slices...); return submdspan_mapping_result { dst_mapping_t(mdspan_non_standard, dst_ext, detail::construct_sub_strides( @@ -668,11 +641,6 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping, -#else - std::integral_constant(), -#endif std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; return submdspan_mapping_result { @@ -719,13 +687,7 @@ layout_stride::mapping::submdspan_mapping_impl( auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - auto inv_map = detail::inv_map_rank( -#if defined(MDSPAN_ENABLE_P3663) - std::cw, -#else - std::integral_constant(), -#endif - std::index_sequence<>(), slices...); + auto inv_map = detail::inv_map_rank(std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. From 50c22e2dd34d7deb79bb3139e4a5eb50ca1e4002 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 14:08:56 -0700 Subject: [PATCH 061/103] Attempt at constant_wrapper work-around --- .../__p2630_bits/constant_wrapper.hpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index a244d1dd..c9463cb9 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -283,18 +283,19 @@ struct constant_wrapper { }; #else -template -struct constant_wrapper { - static constexpr exposition_only::cw_fixed_value X{}; - - static constexpr const auto & value = X.data; - using type = constant_wrapper; - using value_type = typename decltype(X)::type; - - constexpr operator decltype(auto)() const noexcept { return value; } - constexpr decltype(auto) operator()() const noexcept { return value; } +template +struct constant_wrapper_impl +{ + static constexpr T value = Value; + using value_type = T; + using type = constant_wrapper_impl; + constexpr operator value_type() const noexcept { return value; } + constexpr value_type operator()() const noexcept { return value; } }; +template +using constant_wrapper = constant_wrapper_impl; + #endif // (__cplusplus >= 202002L) #if defined(__cpp_constinit) From 49cf216625db58f569b600b1b1be5b7e2feb5bae Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 14:18:16 -0700 Subject: [PATCH 062/103] Some more C++17 back-porting --- .../__p2630_bits/submdspan_extents.hpp | 93 ++++++++++++++----- 1 file changed, 68 insertions(+), 25 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 3b46c7d9..a2f5db44 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -692,22 +692,37 @@ struct extents_constructor<0, Extents, NewStaticExtents...> { namespace detail { -template - requires(std::is_signed_v> || - std::is_unsigned_v>) +MDSPAN_TEMPLATE_REQUIRES( + class IndexType, + class OtherIndexType, + /* requires */ ( + std::is_signed_v> || + std::is_unsigned_v> + ) +) constexpr auto index_cast(OtherIndexType&& i) noexcept { return i; } -template - requires(! std::is_signed_v> && - !std::is_unsigned_v>) +MDSPAN_TEMPLATE_REQUIRES( + class IndexType, + class OtherIndexType, + /* requires */ ( + ! std::is_signed_v> && + ! std::is_unsigned_v> + ) +) constexpr auto index_cast(OtherIndexType&& i) noexcept { return static_cast(i); } -template - requires std::convertible_to +MDSPAN_TEMPLATE_REQUIRES( + class IndexType, + class S, + /* requires */ ( + std::is_convertible_v + ) +) constexpr auto canonical_ice(S s) { static_assert(std::is_signed_v || std::is_unsigned_v); // TODO Mandates: If S models integral-constant-like and if @@ -754,7 +769,12 @@ constexpr T de_ice(T val) { return val; } -template +MDSPAN_TEMPLATE_REQUIRES( + class T, + /* requires */ ( + is_integral_constant_like_v + ) +) constexpr auto de_ice(T) { return T::value; } @@ -1152,28 +1172,51 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli } // namespace detail -template - requires (sizeof...(Slices) == sizeof...(Extents)) // [mdspan.sub.slices] 8 +MDSPAN_TEMPLATE_REQUIRES( + size_t... Inds, + class IndexType, + size_t... Extents, + class... Slices, + /* requires */ ( + sizeof...(Slices) == sizeof...(Extents) + ) +) MDSPAN_INLINE_FUNCTION constexpr auto -submdspan_canonicalize_slices(const extents& exts, Slices... slices) +submdspan_canonicalize_slices_impl( + std::index_sequence, + const extents& exts, + Slices... slices) { - return [&](std::index_sequence) { - return std::tuple{ - // This is ill-formed if slices...[Inds] is not a valid slice type. - // That implements the Mandates clause of [mdspan.sub.slices] 9. - detail::submdspan_canonicalize_one_slice( - exts, - // Clang 21 accepts this code. - // GCC 15.1.0 emits an error: "cannot index an empty pack." + return std::tuple{ + // This is ill-formed if slices...[Inds] is not a valid slice type. + // That implements the Mandates clause of [mdspan.sub.slices] 9. + detail::submdspan_canonicalize_one_slice( + exts, + // Clang 21 accepts this code. + // GCC 15.1.0 emits an error: "cannot index an empty pack." #if defined(__cpp_pack_indexing) && (! (defined(__GNUC__) && (__GNUC__ < 16))) - slices...[Inds] + slices...[Inds] #else - detail::get_kth_in_pack(slices...) + detail::get_kth_in_pack(slices...) #endif - )... - }; - } (std::make_index_sequence{}); + )... + }; +} + +MDSPAN_TEMPLATE_REQUIRES( + class IndexType, + size_t... Extents, + class... Slices, + /* requires */ ( + sizeof...(Slices) == sizeof...(Extents) + ) +) +MDSPAN_INLINE_FUNCTION +constexpr auto +submdspan_canonicalize_slices(const extents& exts, Slices&&... slices) +{ + return submdspan_canonicalize_slices_impl(std::make_index_sequence(), exts, slices...); } #endif // MDSPAN_ENABLE_P3663 From f5fda959355a07698a0f3899ce2315a532046656 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 14:38:15 -0700 Subject: [PATCH 063/103] Add is_layout_mapping_alike_v This is the C++17 back-port of the layout_mapping_alike concept. --- .../__p0009_bits/layout_stride.hpp | 25 +++++++++++++++++++ .../__p2630_bits/submdspan_mapping.hpp | 7 +++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p0009_bits/layout_stride.hpp b/include/experimental/__p0009_bits/layout_stride.hpp index 2227f230..e8d05d2a 100644 --- a/include/experimental/__p0009_bits/layout_stride.hpp +++ b/include/experimental/__p0009_bits/layout_stride.hpp @@ -102,6 +102,31 @@ namespace detail { std::bool_constant::value; std::bool_constant::value; }; + + template + constexpr bool is_layout_mapping_alike_v = layout_mapping_alike; + +#else + + // C++17-compatible implementation of layout_mapping_alike (used for is_layout_stride_mapping_v) + template + struct is_layout_mapping_alike_impl : std::false_type {}; + + template + struct is_layout_mapping_alike_impl::value>, + std::enable_if_t::value>, + std::enable_if_t::value>, + std::enable_if_t::value>, + std::bool_constant, + std::bool_constant, + std::bool_constant + >> : std::true_type {}; + + template + constexpr bool is_layout_mapping_alike_v = is_layout_mapping_alike_impl::value; + #endif } // namespace detail diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index a7d53cd4..3b998076 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -54,7 +54,12 @@ template struct submdspan_mapping_result { namespace detail { #if defined(MDSPAN_ENABLE_P3663) -template +MDSPAN_TEMPLATE_REQUIRES( + class LayoutMapping, + /* requires */ ( + is_layout_mapping_alike_v + ) +) constexpr auto submdspan_mapping_with_full_extents(const LayoutMapping& mapping) { using extents_type = typename LayoutMapping::extents_type; From efe3f0764cf95fc6a15a06990e653407f1f34359 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 15:10:17 -0700 Subject: [PATCH 064/103] Attempt to fix is_integral_constant_like_v ...and attempt to back-port some concepts to C++17. --- .../__p2630_bits/integral_constant_like.hpp | 23 ++++++++++----- .../__p2630_bits/submdspan_mapping.hpp | 29 +++++++++++++++++++ 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/include/experimental/__p2630_bits/integral_constant_like.hpp b/include/experimental/__p2630_bits/integral_constant_like.hpp index 90b5b17d..881e7fb7 100644 --- a/include/experimental/__p2630_bits/integral_constant_like.hpp +++ b/include/experimental/__p2630_bits/integral_constant_like.hpp @@ -31,14 +31,23 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { + template + struct is_integral_constant_like_impl : std::false_type {}; + template - constexpr bool is_integral_constant_like_v = - std::is_integral_v> && - ! std::is_same_v> && - std::is_convertible_v && - is_equality_comparable_with::value && - std::bool_constant::value && - std::bool_constant(T()) == T::value>::value; + struct is_integral_constant_like_impl> : + std::bool_constant< + std::is_integral_v> && + ! std::is_same_v> && + std::is_convertible_v && + is_equality_comparable_with::value && + std::bool_constant::value && + std::bool_constant(T()) == T::value>::value + > + {}; + + template + constexpr bool is_integral_constant_like_v = is_integral_constant_like_impl::value; } // namespace detail } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 3b998076..c49c9a9a 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -75,10 +75,13 @@ template constexpr bool is_submdspan_mapping_result< submdspan_mapping_result> = true; +#if defined(MDSPAN_IMPL_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 template concept submdspan_mapping_result = is_submdspan_mapping_result; +#endif // defined(MDSPAN_IMPL_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 +#if defined(MDSPAN_IMPL_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 template concept mapping_sliceable_with_full_extents = requires(const LayoutMapping& mapping) { @@ -86,6 +89,32 @@ concept mapping_sliceable_with_full_extents = submdspan_mapping_with_full_extents(mapping) } -> submdspan_mapping_result; }; + +template +constexpr bool mapping_sliceable_with_full_extents_v = + mapping_sliceable_with_full_extents; + +#else +template +struct mapping_sliceable_with_full_extents_impl : std::false_type {}; + +template +struct mapping_sliceable_with_full_extents_impl< + LayoutMapping, + std::void_t< + std::enable_if_t< + is_submdspan_mapping_result< + decltype(submdspan_mapping_with_full_extents(std::declval())) + > + > + > +> : std::true_type {}; + +template +constexpr bool mapping_sliceable_with_full_extents_v = + mapping_sliceable_with_full_extents_impl::value; +#endif // defined(MDSPAN_IMPL_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 + #endif // MDSPAN_ENABLE_P3663 // We use const Slice& and not Slice&& because the various From 1d6cdb4854fc46d1b4078ec584a4bc978186f2d4 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 19:31:13 -0700 Subject: [PATCH 065/103] Make de_ice build with C++17 Consider how we could remove the is_integral_v constraint. --- .../experimental/__p2630_bits/submdspan_extents.hpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index a2f5db44..d5a1e713 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -764,7 +764,12 @@ constexpr auto subtract_ice(X x, Y y) { #endif } -template +MDSPAN_TEMPLATE_REQUIRES( + class T, + /* requires */ ( + std::is_integral_v> + ) +) constexpr T de_ice(T val) { return val; } @@ -772,10 +777,10 @@ constexpr T de_ice(T val) { MDSPAN_TEMPLATE_REQUIRES( class T, /* requires */ ( - is_integral_constant_like_v + is_integral_constant_like_v> ) ) -constexpr auto de_ice(T) { +constexpr decltype(T::value) de_ice(T) { return T::value; } From be1360b30a00cc232278b89eb66b076e039627e4 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 22:24:55 -0700 Subject: [PATCH 066/103] Fix some tests --- .../experimental/__p2630_bits/strided_slice.hpp | 5 +++++ tests/test_constant_wrapper.cpp | 2 ++ tests/test_strided_slice.cpp | 15 ++++++++++----- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index ba1d4b64..74a30498 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -64,4 +64,9 @@ struct strided_slice { static_assert(detail::__mdspan_is_index_like_v); }; +#if (__cplusplus < 202002L) +template +strided_slice(const OffsetType&, const ExtentType&, const StrideType&) -> + strided_slice; +#endif } // MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/tests/test_constant_wrapper.cpp b/tests/test_constant_wrapper.cpp index c7654c82..93251055 100644 --- a/tests/test_constant_wrapper.cpp +++ b/tests/test_constant_wrapper.cpp @@ -79,9 +79,11 @@ TEST(TestConstantWrapper, IntegerPlus) { constexpr size_t value3 = decltype(cw_11)(); static_assert(value == value3); +#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) && (__cplusplus >= 202002L) static_assert(std::is_same_v< decltype(cw_11), decltype(std::cw)>); +#endif [[maybe_unused]] auto expected_result = std::cw; using expected_type = std::constant_wrapper; diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp index ebe12e6c..18c6fabe 100644 --- a/tests/test_strided_slice.cpp +++ b/tests/test_strided_slice.cpp @@ -46,8 +46,13 @@ template constexpr auto IC = std::integral_constant{}; #if defined(MDSPAN_ENABLE_P3663) -template - requires(! std::is_same_v) +MDSPAN_TEMPLATE_REQUIRES( + class T, + T Value, + /* requires */ ( + std::is_integral_v && ! std::is_same_v + ) +) struct my_integral_constant { static constexpr T value = Value; constexpr operator T () const { return value; } @@ -60,14 +65,14 @@ template constexpr auto IC2 = my_integral_constant{}; static_assert( - std::convertible_to< + std::is_convertible_v< my_integral_constant, decltype(my_integral_constant::value)>); static_assert( - std::equality_comparable_with< + Kokkos::detail::is_equality_comparable_with< my_integral_constant, - decltype(my_integral_constant::value)>); + decltype(my_integral_constant::value)>::value); static_assert( Kokkos::detail::is_integral_constant_like_v< From c1bd3adfa8847e82b5d9f61444772dfb40ba1a3b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 22:41:45 -0700 Subject: [PATCH 067/103] C++17 with P3663 ON now build and passes tests! --- tests/test_submdspan_check_static_bounds.cpp | 40 ++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index 60f07870..13797348 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -26,13 +26,53 @@ # include #endif +namespace adl_get_trait_detail { + template + constexpr auto get(T) = delete; + + template + struct has_get_like_pair_0 : std::bool_constant {}; + + template + struct has_get_like_pair_0(std::declval()))>> + : std::bool_constant< + std::is_convertible_v< + decltype(get<0>(std::declval())), + typename PairLike::first_type + > + > + {}; + + template + struct has_get_like_pair_1 : std::false_type {}; + + template + struct has_get_like_pair_1(std::declval()))>> + : std::bool_constant< + std::is_convertible_v< + decltype(get<1>(std::declval())), + typename PairLike::second_type + > + > + {}; +} // namespace adl_get_trait_detail + namespace test { +#if defined(MDSPAN_IMPL_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 template concept has_get_like_pair = requires(T t) { { get<0>(t) } -> std::convertible_to; { get<1>(t) } -> std::convertible_to; }; +#else + +template +constexpr bool has_get_like_pair = + adl_get_trait_detail::has_get_like_pair_0::value && + adl_get_trait_detail::has_get_like_pair_1::value; + +#endif // defined(MDSPAN_IMPL_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 struct foo {}; struct bar {}; From d6777fad66e0942185de044bf5a6595b4ceff6be Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 22:58:01 -0700 Subject: [PATCH 068/103] Remove C++20 lambda from submdspan definition --- .../experimental/__p2630_bits/submdspan.hpp | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index 5cc6be68..70911c2d 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -20,6 +20,24 @@ #include "submdspan_mapping.hpp" namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + +#if (__cplusplus < 202002L) + +namespace detail { + + template + constexpr auto submdspan_mapping_caller( + const Mapping& src_mapping, + Slices&&... slices) + { + return submdspan_mapping(src_mapping, std::forward(slices)...); + } +}; + +} // namespace detail + +#endif // (__cplusplus < 202002L) + template MDSPAN_INLINE_FUNCTION @@ -43,11 +61,25 @@ submdspan(const mdspan &src, auto canonical_slices_tuple = submdspan_canonicalize_slices(src.extents(), slices...); + +#if (__cplusplus >= 202002L) + auto sub_map_result = std::apply( [&] (TheSlices&&... the_slices) { return submdspan_mapping(src.mapping(), std::forward(the_slices)...); }, canonical_slices_tuple); +#else + + auto sub_map_result = std::apply( + submdspan_mapping_caller( + src.mapping(), + std::forward(the_slices)... + ), + canonical_slices_tuple); + +#endif // (__cplusplus >= 202002L) + # endif // NOTE Added to P3663R2: It's src.data_handle(), not src.data(). From bc857781700c4d6dcb33de3f8517e4540a7edb3a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 22:59:29 -0700 Subject: [PATCH 069/103] Remove some C++20 designated initializers --- include/experimental/__p2630_bits/submdspan_extents.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index d5a1e713..ffd547d2 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1140,9 +1140,9 @@ submdspan_canonicalize_one_slice(const extents& exts, Sli return strided_slice { - .offset = offset, - .extent = extent, - .stride = stride + /* .offset = */ offset, + /* .extent = */ extent, + /* .stride = */ stride }; #endif } From 90cd6237e6a245286b13510fdd8c74cd9a6d3458 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 9 Feb 2026 23:03:43 -0700 Subject: [PATCH 070/103] Remove unused parameter name --- include/experimental/__p2630_bits/submdspan_extents.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index ffd547d2..285370b6 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -53,7 +53,7 @@ template< size_t... MapIdxs> MDSPAN_INLINE_FUNCTION constexpr auto inv_map_rank_impl( - std::integral_constant counter, + std::integral_constant, std::index_sequence, Slice, SliceSpecifiers... slices) From b2cd032d2ce3b22c78e90849394c967f712ce25f Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 08:48:23 -0700 Subject: [PATCH 071/103] Fix more C++20 - isms --- .../experimental/__p2630_bits/submdspan.hpp | 2 +- .../__p2630_bits/submdspan_extents.hpp | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index 70911c2d..161a5025 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -32,7 +32,7 @@ namespace detail { { return submdspan_mapping(src_mapping, std::forward(slices)...); } -}; +} } // namespace detail diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 285370b6..d3a5def1 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1064,12 +1064,33 @@ constexpr decltype(auto) get_kth_in_pack(First&& first, Rest&&... rest) { } #endif +#if (__cplusplus < 202002L) +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_canonical_kth_subdmspan_slice_types_impl( + std::index_sequence, + const extents& exts, + Slices... slices) +{ + (check_canonical_kth_submdspan_slice_type( + exts, + slices...[Inds]), ...); +} +#endif // (__cplusplus < 202002L) + template MDSPAN_INLINE_FUNCTION constexpr void check_canonical_kth_subdmspan_slice_types( const extents& exts, Slices... slices) { +#if (__cplusplus < 202002L) + check_canonical_kth_subdmspan_slice_types_impl( + std::make_index_sequence{}, exts, slices...); +#else + // We really want to keep the C++20 branch here + // because it could offer compile time advantages. [&] (std::index_sequence) { (check_canonical_kth_submdspan_slice_type( exts, @@ -1080,6 +1101,7 @@ check_canonical_kth_subdmspan_slice_types( #endif ), ...); } (std::make_index_sequence{}); +#endif // (__cplusplus < 202002L) } // [mdspan.sub.slices] 11 From 105c2f87627a4351099154882c5a7f73b04ecfab Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 14:05:43 -0700 Subject: [PATCH 072/103] De-C++20-ize submdspan_canonicalize_slices --- .../experimental/__p2630_bits/submdspan.hpp | 19 +++++++------------ .../__p2630_bits/submdspan_extents.hpp | 6 +++--- tests/test_canonicalize_slices.cpp | 3 ++- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index 161a5025..73071a8a 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -24,7 +24,6 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { #if (__cplusplus < 202002L) namespace detail { - template constexpr auto submdspan_mapping_caller( const Mapping& src_mapping, @@ -32,8 +31,6 @@ namespace detail { { return submdspan_mapping(src_mapping, std::forward(slices)...); } -} - } // namespace detail #endif // (__cplusplus < 202002L) @@ -62,28 +59,26 @@ submdspan(const mdspan &src, auto canonical_slices_tuple = submdspan_canonicalize_slices(src.extents(), slices...); -#if (__cplusplus >= 202002L) +# if (__cplusplus >= 202002L) auto sub_map_result = std::apply( [&] (TheSlices&&... the_slices) { return submdspan_mapping(src.mapping(), std::forward(the_slices)...); }, canonical_slices_tuple); -#else +# else auto sub_map_result = std::apply( - submdspan_mapping_caller( + detail::submdspan_mapping_caller( src.mapping(), - std::forward(the_slices)... + std::forward(slices)... ), canonical_slices_tuple); -#endif // (__cplusplus >= 202002L) +# endif // (__cplusplus >= 202002L) -# endif +# endif // defined(__cpp_structured_bindings) && (__cpp_structured_bindings >= 202411L) - // NOTE Added to P3663R2: It's src.data_handle(), not src.data(). - // NOTE Added to P3663R2: Missing "typename" before AccessorPolicy::offset_policy. return mdspan( src.accessor().offset(src.data_handle(), sub_map_result.offset), sub_map_result.mapping, @@ -100,6 +95,6 @@ submdspan(const mdspan &src, src.accessor().offset(src.data_handle(), sub_submdspan_mapping_result.offset), sub_submdspan_mapping_result.mapping, sub_accessor_t(src.accessor())); -#endif +#endif // MDSPAN_ENABLE_P3663 } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index d3a5def1..3f4b7fad 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1075,7 +1075,7 @@ check_canonical_kth_subdmspan_slice_types_impl( { (check_canonical_kth_submdspan_slice_type( exts, - slices...[Inds]), ...); + get_kth_in_pack(slices...)), ...); } #endif // (__cplusplus < 202002L) @@ -1086,8 +1086,8 @@ check_canonical_kth_subdmspan_slice_types( const extents& exts, Slices... slices) { #if (__cplusplus < 202002L) - check_canonical_kth_subdmspan_slice_types_impl( - std::make_index_sequence{}, exts, slices...); + check_canonical_kth_subdmspan_slice_types_impl( + std::make_index_sequence(), exts, slices...); #else // We really want to keep the C++20 branch here // because it could offer compile time advantages. diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index c55d77eb..f695af10 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -140,7 +140,8 @@ test_canonicalize_slices( { auto result = Kokkos::submdspan_canonicalize_slices(input_extents, slices...); [&] (std::index_sequence) { - auto test_one = [&] (std::integral_constant) { + // We need maybe_unused in case the pack is empty. + [[maybe_unused]] auto test_one = [&] (std::integral_constant) { using std::get; auto left = get(result); auto right = get(expected_result); From d635a0550ce6ea7a24ecfbfa1a821b2c02bfd0c2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 14:18:44 -0700 Subject: [PATCH 073/103] Fix submdspan itself for C++17, locally --- .../experimental/__p2630_bits/submdspan.hpp | 37 ++++++------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index 73071a8a..ffedcb6d 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -21,16 +21,18 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -#if (__cplusplus < 202002L) +#if defined(MDSPAN_ENABLE_P3663) && (! defined(__cpp_structured_bindings) || (__cpp_structured_bindings < 202411L)) namespace detail { - template - constexpr auto submdspan_mapping_caller( - const Mapping& src_mapping, - Slices&&... slices) - { - return submdspan_mapping(src_mapping, std::forward(slices)...); - } + template + struct submdspan_mapping_caller { + const Mapping& src_mapping; + + template + constexpr auto operator() (Slices&&... slices) const { + return submdspan_mapping(src_mapping, std::forward(slices)...); + } + }; } // namespace detail #endif // (__cplusplus < 202002L) @@ -50,7 +52,6 @@ submdspan(const mdspan &src, auto [...canonical_slices] = submdspan_canonicalize_slices(src.extents(), slices...); - // NOTE Added to P3663R2: [canonical_]slices (incorrect formatting). auto sub_map_result = submdspan_mapping(src.mapping(), canonical_slices...); @@ -58,25 +59,11 @@ submdspan(const mdspan &src, auto canonical_slices_tuple = submdspan_canonicalize_slices(src.extents(), slices...); - -# if (__cplusplus >= 202002L) - - auto sub_map_result = std::apply( - [&] (TheSlices&&... the_slices) { - return submdspan_mapping(src.mapping(), std::forward(the_slices)...); - }, canonical_slices_tuple); - -# else - + using src_mapping_type = decltype(src.mapping()); // CTAD doesn't seem to work auto sub_map_result = std::apply( - detail::submdspan_mapping_caller( - src.mapping(), - std::forward(slices)... - ), + detail::submdspan_mapping_caller{src.mapping()}, canonical_slices_tuple); -# endif // (__cplusplus >= 202002L) - # endif // defined(__cpp_structured_bindings) && (__cpp_structured_bindings >= 202411L) return mdspan( From 32d5788629ebef88de8afa4076be8eaa229121fa Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 14:35:00 -0700 Subject: [PATCH 074/103] Fix spurious unused parameter warning --- include/experimental/__p2630_bits/submdspan_extents.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 3f4b7fad..397e5e56 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1108,7 +1108,7 @@ check_canonical_kth_subdmspan_slice_types( template MDSPAN_INLINE_FUNCTION constexpr auto -submdspan_canonicalize_one_slice(const extents& exts, Slice s) { +submdspan_canonicalize_one_slice(const extents& exts, [[maybe_unused]] Slice s) { // Part of [mdspan.sub.slices] 9. // This could be combined with the if constexpr branches below. #if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) From a98fa87730798d9739f0086a1abe53d84dc9c320 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 14:37:33 -0700 Subject: [PATCH 075/103] Fix spurious unused parameter warning --- include/experimental/__p2630_bits/submdspan_extents.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 397e5e56..ec11dd60 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1108,7 +1108,10 @@ check_canonical_kth_subdmspan_slice_types( template MDSPAN_INLINE_FUNCTION constexpr auto -submdspan_canonicalize_one_slice(const extents& exts, [[maybe_unused]] Slice s) { +submdspan_canonicalize_one_slice( + [[maybe_unused]] const extents& exts, + [[maybe_unused]] Slice s) +{ // Part of [mdspan.sub.slices] 9. // This could be combined with the if constexpr branches below. #if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) From f79a387e573cddc8e8961a91a4258ca6827490be Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 14:43:03 -0700 Subject: [PATCH 076/103] Remove more C++20 - isms --- .../__p2630_bits/submdspan_extents.hpp | 6 ++--- .../__p2630_bits/submdspan_mapping.hpp | 23 ++++++++++++++++--- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index ec11dd60..49dcef86 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1143,9 +1143,9 @@ submdspan_canonicalize_one_slice( return strided_slice { - .offset = offset, - .extent = extent, - .stride = stride + /* .offset = */ offset, + /* .extent = */ extent, + /* .stride = */ stride }; #endif } diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index c49c9a9a..664c50e3 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -54,6 +54,23 @@ template struct submdspan_mapping_result { namespace detail { #if defined(MDSPAN_ENABLE_P3663) + + +MDSPAN_TEMPLATE_REQUIRES( + class LayoutMapping, + size_t... Inds, + /* requires */ ( + is_layout_mapping_alike_v + ) +) +constexpr auto +submdspan_mapping_with_full_extents_impl( + const LayoutMapping& mapping, std::index_sequence) +{ + using extents_type = typename LayoutMapping::extents_type; + return submdspan_mapping(mapping, ((void) Inds, full_extent)...); +} + MDSPAN_TEMPLATE_REQUIRES( class LayoutMapping, /* requires */ ( @@ -63,9 +80,9 @@ MDSPAN_TEMPLATE_REQUIRES( constexpr auto submdspan_mapping_with_full_extents(const LayoutMapping& mapping) { using extents_type = typename LayoutMapping::extents_type; - return [&] (std::index_sequence) { - return submdspan_mapping(mapping, ((void) Inds, full_extent)...); - } (std::make_index_sequence{}); + constexpr size_t the_rank = extents_type::rank(); + return submdspan_mapping_with_full_extents_impl( + mapping, std::make_index_sequence()); } template From e8fda200a9ad0af0796f1150b8921e166fbad91a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 15:29:56 -0700 Subject: [PATCH 077/103] Get rid of more designated initializers --- include/experimental/__p2630_bits/submdspan_extents.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 49dcef86..daefcc2b 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1192,9 +1192,9 @@ submdspan_canonicalize_one_slice( return strided_slice { - .offset = offset, - .extent = extent, - .stride = stride + /* .offset = */ offset, + /* .extent = */ extent, + /* .stride = */ stride }; #endif } From f9c32506fc2a36c827f828134151d1970131635c Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 15:36:17 -0700 Subject: [PATCH 078/103] Remove unused type alias --- include/experimental/__p2630_bits/submdspan_mapping.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 664c50e3..4cb5af38 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -67,7 +67,6 @@ constexpr auto submdspan_mapping_with_full_extents_impl( const LayoutMapping& mapping, std::index_sequence) { - using extents_type = typename LayoutMapping::extents_type; return submdspan_mapping(mapping, ((void) Inds, full_extent)...); } From 6fa9d354f6a94509b3408504440abd142842a897 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 15:47:41 -0700 Subject: [PATCH 079/103] De-C++20-size test_canonicalize_slices --- tests/test_canonicalize_slices.cpp | 38 ++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index f695af10..f87dd6ca 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -131,6 +131,30 @@ constexpr bool slice_equal( return left.offset == right.offset && left.extent == right.extent && left.stride == right.stride; } +template +void +test_canonicalize_slices_impl_one( + std::integral_constant, + const Result& result, + const ExpectedResult& expected_result) +{ + using std::get; + auto left = get(result); + auto right = get(expected_result); + const bool outcome = slice_equal(left, right); + ASSERT_TRUE(outcome) << " failed for k=" << Index; +} + +template +void +test_canonicalize_slices_impl( + std::index_sequence, + const Result& result, + const ExpectedResult& expected_result) +{ + (test_canonicalize_slices_impl_one(std::integral_constant{}, result, expected_result), ...); +} + template void test_canonicalize_slices( @@ -139,19 +163,7 @@ test_canonicalize_slices( Slices... slices) { auto result = Kokkos::submdspan_canonicalize_slices(input_extents, slices...); - [&] (std::index_sequence) { - // We need maybe_unused in case the pack is empty. - [[maybe_unused]] auto test_one = [&] (std::integral_constant) { - using std::get; - auto left = get(result); - auto right = get(expected_result); - const bool result = slice_equal(left, right); - // Below isn't well-formed for some reason -- a compiler bug? - //const bool result = slice_equal(get(result), get(expected_result)); - ASSERT_TRUE(result) << " failed for k=" << Ind; - }; - (test_one(std::integral_constant{}), ...); - } (std::make_index_sequence()); + test_canonicalize_slices_impl(std::make_index_sequence(), result, expected_result); } TEST(CanonicalizeSlices, Rank0) { From af6d1c81e6547864c84e5c7403760e36591f2b10 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 15:50:42 -0700 Subject: [PATCH 080/103] Remove C++20-isms from test_submdspan_check_static_bounds --- tests/test_submdspan_check_static_bounds.cpp | 29 ++++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index 13797348..05ea02c1 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -187,19 +187,30 @@ void test_check_static_bounds( ; } -template -void test_full_extent( - Kokkos::extents extents) +template +void test_full_extent_impl_0( + std::index_sequence, + const Extents& extents) { using Kokkos::detail::check_static_bounds_result; + (test_check_static_bounds(extents, check_static_bounds_result::in_bounds), ...); +} - [&] (std::index_sequence) { - (test_check_static_bounds(extents, check_static_bounds_result::in_bounds), ...); - } (std::make_index_sequence()); +template +void test_full_extent_impl_1( + std::index_sequence, + const Extents& extents) +{ + using Kokkos::detail::check_static_bounds_result; + (test_check_static_bounds(extents, check_static_bounds_result::in_bounds), ...); +} - [&] (std::index_sequence) { - (test_check_static_bounds(extents, check_static_bounds_result::in_bounds), ...); - } (std::make_index_sequence()); +template +void test_full_extent( + Kokkos::extents extents) +{ + test_full_extent_impl_0(std::make_index_sequence(), extents); + test_full_extent_impl_1(std::make_index_sequence(), extents); } template From 16a920f9989a40f8ff6ceef6953ae98391931703 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 15:56:26 -0700 Subject: [PATCH 081/103] Remove spurious semicolon --- tests/test_submdspan_check_static_bounds.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_submdspan_check_static_bounds.cpp b/tests/test_submdspan_check_static_bounds.cpp index 05ea02c1..6348449f 100644 --- a/tests/test_submdspan_check_static_bounds.cpp +++ b/tests/test_submdspan_check_static_bounds.cpp @@ -101,7 +101,7 @@ class non_aggregate_pair { else { return p.second; } - }; + } constexpr foo get_foo() const { return foo_; } constexpr bar get_bar() const { return bar_; } From ab0e873e18bdfc726e0118a5206758c5e95854b3 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 16:04:27 -0700 Subject: [PATCH 082/103] Remove more designated initializers --- tests/test_canonicalize_slices.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index f87dd6ca..c21bc7ec 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -212,9 +212,9 @@ TEST(CanonicalizeSlices, Rank1_pair) { }; #else constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - .offset = offset, - .extent = extent, - .stride = stride + /* .offset = */ offset, + /* .extent = */ extent, + /* .stride = */ stride }}; #endif constexpr auto exts = Kokkos::extents{}; @@ -241,9 +241,9 @@ TEST(CanonicalizeSlices, Rank1_aggregate_pair) { }; #else constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - .offset = offset, - .extent = extent, - .stride = stride + /* .offset = */ offset, + /* .extent = */ extent, + /* .stride = */ stride }}; #endif constexpr auto exts = Kokkos::extents{}; @@ -270,9 +270,9 @@ TEST(CanonicalizeSlices, Rank1_nonaggregate_pair) { }; #else constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - .offset = offset, - .extent = extent, - .stride = stride + /* .offset = */ offset, + /* .extent = */ extent, + /* .stride = */ stride }}; #endif constexpr auto exts = Kokkos::extents{}; From 29a68be4f4613c7c15e8e4d51f0a46aef84debc0 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 16:12:57 -0700 Subject: [PATCH 083/103] Fix some unsigned/signed comparison warnings --- .../__p2630_bits/submdspan_extents.hpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index daefcc2b..e75d249b 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -723,7 +723,7 @@ MDSPAN_TEMPLATE_REQUIRES( std::is_convertible_v ) ) -constexpr auto canonical_ice(S s) { +constexpr auto canonical_ice([[maybe_unused]] S s) { static_assert(std::is_signed_v || std::is_unsigned_v); // TODO Mandates: If S models integral-constant-like and if // decltype(S::value) is a signed or unsigned integer type, then @@ -854,10 +854,11 @@ template if constexpr (de_ice(S_k{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.1 } - else if constexpr (Exts_k != dynamic_extent && Exts_k <= de_ice(S_k{})) { + // We know de_ice(S_k{}) is nonnegative here, so the cast to size_t should be safe. + else if constexpr (Exts_k != dynamic_extent && Exts_k <= static_cast(de_ice(S_k{}))) { return check_static_bounds_result::out_of_bounds; } - else if constexpr (Exts_k != dynamic_extent && de_ice(S_k{}) < Exts_k) { + else if constexpr (Exts_k != dynamic_extent && static_cast(de_ice(S_k{})) < Exts_k) { return check_static_bounds_result::in_bounds; } else { @@ -938,9 +939,10 @@ template if constexpr (de_ice(S_k0{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.4.1 } + // We know de_ice(S_k0{}) is nonnegative here, so the cast to size_t should be safe. else if constexpr ( Exts_k != dynamic_extent && - Exts_k < de_ice(S_k0{})) + Exts_k < static_cast(de_ice(S_k0{}))) { return check_static_bounds_result::out_of_bounds; // 14.4.2 } @@ -950,9 +952,11 @@ template { return check_static_bounds_result::out_of_bounds; // 14.4.3 } + // We know de_ice(S_k1{}) >= de_ice(S_k0{}) >= 0 here, + // so the cast to size_t should be safe. else if constexpr ( Exts_k != dynamic_extent && - Exts_k < de_ice(S_k1{})) + Exts_k < static_cast(de_ice(S_k1{}))) { return check_static_bounds_result::out_of_bounds; // 14.4.4 } From 0ef2b0ab84fd623adc08cc8bdb4b8de9c62e0177 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 16:23:39 -0700 Subject: [PATCH 084/103] Fix more things --- .../__p2630_bits/submdspan_extents.hpp | 15 +++++--- tests/test_canonicalize_slices.cpp | 37 +++---------------- 2 files changed, 15 insertions(+), 37 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index e75d249b..76fbd119 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -396,7 +396,7 @@ template < MDSPAN_INLINE_FUNCTION constexpr auto last_of( #if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper, + std::constant_wrapper k_input, #else std::integral_constant, #endif @@ -404,7 +404,7 @@ constexpr auto last_of( ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { #if defined(MDSPAN_ENABLE_P3663) - constexpr size_t k_value = std::constant_wrapper{}(); + constexpr size_t k_value = k_input(); #else constexpr size_t k_value = k; #endif @@ -876,8 +876,9 @@ template if constexpr (de_ice(offset_type{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.1 } + // We know de_ice(offset_type{}) >= 0, so the cast to size_t should be safe. else if constexpr ( - Exts_k != dynamic_extent && Exts_k < de_ice(offset_type{})) + Exts_k != dynamic_extent && Exts_k < static_cast(de_ice(offset_type{}))) { return check_static_bounds_result::out_of_bounds; // 14.3.2 } @@ -887,9 +888,11 @@ template if constexpr (de_ice(offset_type{}) + de_ice(extent_type{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.3.3 } + // We know de_ice(offset_type{}) + de_ice(extent_type{}) >= 0, + // so the cast to size_t should be safe. else if constexpr ( Exts_k != dynamic_extent && - Exts_k < de_ice(offset_type{}) + de_ice(extent_type{})) + Exts_k < static_cast(de_ice(offset_type{}) + de_ice(extent_type{}))) { return check_static_bounds_result::out_of_bounds; // 14.3.4 } @@ -939,7 +942,7 @@ template if constexpr (de_ice(S_k0{}) < 0) { return check_static_bounds_result::out_of_bounds; // 14.4.1 } - // We know de_ice(S_k0{}) is nonnegative here, so the cast to size_t should be safe. + // We know de_ice(S_k0{}) >= 0, so the cast to size_t should be safe. else if constexpr ( Exts_k != dynamic_extent && Exts_k < static_cast(de_ice(S_k0{}))) @@ -952,7 +955,7 @@ template { return check_static_bounds_result::out_of_bounds; // 14.4.3 } - // We know de_ice(S_k1{}) >= de_ice(S_k0{}) >= 0 here, + // We know de_ice(S_k1{}) >= de_ice(S_k0{}) >= 0, // so the cast to size_t should be safe. else if constexpr ( Exts_k != dynamic_extent && diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index c21bc7ec..7dcb8820 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -116,17 +116,10 @@ constexpr bool slice_equal(const Left&, Kokkos::full_extent_t) { return std::is_convertible_v; } -#if defined(__clang__) && (__clang_major__ < 15) template constexpr bool slice_equal( const Kokkos::strided_slice& left, const Kokkos::strided_slice& right) -#else -template -constexpr bool slice_equal( - const Kokkos::strided_slice& left, - const Kokkos::strided_slice& right) -#endif { return left.offset == right.offset && left.extent == right.extent && left.stride == right.stride; } @@ -198,7 +191,8 @@ TEST(CanonicalizeSlices, Rank1_pair) { constexpr auto offset = std::cw; constexpr auto extent = size_t(4u); // 11 - 7 constexpr auto stride = std::cw; -#if defined(__clang__) && (__clang_major__ < 15) + + // Some compilers aren't so good at CTAD for aggregates. const auto expected_slices = std::tuple{ Kokkos::strided_slice< decltype(offset), @@ -210,13 +204,6 @@ TEST(CanonicalizeSlices, Rank1_pair) { stride } }; -#else - constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - /* .offset = */ offset, - /* .extent = */ extent, - /* .stride = */ stride - }}; -#endif constexpr auto exts = Kokkos::extents{}; test_canonicalize_slices(expected_slices, exts, slice0); } @@ -227,7 +214,8 @@ TEST(CanonicalizeSlices, Rank1_aggregate_pair) { constexpr auto offset = size_t(7u); constexpr auto extent = (size_t(11u) - size_t(7u)); constexpr auto stride = std::cw; -#if defined(__clang__) && (__clang_major__ < 15) + + // Some compilers aren't so good at CTAD for aggregates. const auto expected_slices = std::tuple{ Kokkos::strided_slice< decltype(offset), @@ -239,13 +227,6 @@ TEST(CanonicalizeSlices, Rank1_aggregate_pair) { stride } }; -#else - constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - /* .offset = */ offset, - /* .extent = */ extent, - /* .stride = */ stride - }}; -#endif constexpr auto exts = Kokkos::extents{}; test_canonicalize_slices(expected_slices, exts, slice0); } @@ -256,7 +237,8 @@ TEST(CanonicalizeSlices, Rank1_nonaggregate_pair) { constexpr auto offset = size_t(7u); constexpr auto extent = (size_t(11u) - size_t(7u)); constexpr auto stride = std::cw; -#if defined(__clang__) && (__clang_major__ < 15) + + // Some compilers aren't so good at CTAD for aggregates. const auto expected_slices = std::tuple{ Kokkos::strided_slice< decltype(offset), @@ -268,13 +250,6 @@ TEST(CanonicalizeSlices, Rank1_nonaggregate_pair) { stride } }; -#else - constexpr auto expected_slices = std::tuple{Kokkos::strided_slice{ - /* .offset = */ offset, - /* .extent = */ extent, - /* .stride = */ stride - }}; -#endif constexpr auto exts = Kokkos::extents{}; test_canonicalize_slices(expected_slices, exts, slice0); } From 7d4ad8827b1b761004ca4099fe45fea9640119a3 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 16:24:34 -0700 Subject: [PATCH 085/103] Fix more things --- tests/test_strided_slice.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp index 18c6fabe..30963c53 100644 --- a/tests/test_strided_slice.cpp +++ b/tests/test_strided_slice.cpp @@ -23,12 +23,9 @@ namespace { template void test_strided_slice(OffsetType offset, ExtentType extent, StrideType stride) { - // Clang 14 is bad at CTAD for aggregates. -#if defined(__clang__) && (__clang_major__ < 15) + // Some compilers are bad at CTAD for aggregates. Kokkos::strided_slice s{offset, extent, stride}; -#else - Kokkos::strided_slice s{offset, extent, stride}; -#endif + static_assert(std::is_same_v>); auto offset2 = s.offset; static_assert(std::is_same_v); From 23f6403bc62bb8ab45f2a106786ce0b1cf93e2f5 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 16:56:23 -0700 Subject: [PATCH 086/103] Deduplicate last_of P3663 vs. not impls Deduplicate the P3663 ON vs. OFF versions of last_of, by making them constrained on integral-constant-like, instead of overloading on constant_wrapper vs. integral_constant. --- .../__p2630_bits/submdspan_extents.hpp | 141 +++++++++--------- 1 file changed, 67 insertions(+), 74 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 76fbd119..1bc7bbac 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -274,66 +274,62 @@ first_of(const strided_slice &r) { // of the original view and which rank from the extents. // This is needed in the case of slice being full_extent_t. -// clang++ with C++14 is not fond of the pragma appearing inside the -// macro definition. In that case, it complains, "error: embedding a -// directive within macro arguments has undefined behavior -// [-Werror,-Wembedded-directive]." The fix is to duplicate code. - -#if defined(MDSPAN_ENABLE_P3663) MDSPAN_TEMPLATE_REQUIRES( - auto k, + class IntegralConstant, class Extents, class Integral, - /* requires */(std::is_convertible_v) -) -#else -MDSPAN_TEMPLATE_REQUIRES( - size_t k, - class Extents, - class Integral, - /* requires */(std::is_convertible_v) + /* requires */( + is_integral_constant_like_v && + std::is_convertible_v + ) ) -#endif // MDSPAN_ENABLE_P3663 MDSPAN_INLINE_FUNCTION constexpr Integral last_of( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper, -#else - std::integral_constant, -#endif - const Extents &, - const Integral &i) + IntegralConstant, + const Extents&, + const Integral& i) { return i; } +// clang++ with C++14 is not fond of the pragma appearing inside the +// macro definition. In that case, it complains, "error: embedding a +// directive within macro arguments has undefined behavior +// [-Werror,-Wembedded-directive]." The fix is to duplicate code. + #if ! defined(MDSPAN_ENABLE_P3663) // P3663 does not need these index_pair_like overloads, // because last_of should never see a pair-like type. MDSPAN_TEMPLATE_REQUIRES( - size_t k, + class IntegralConstant, class Extents, class Slice, - /* requires */(index_pair_like::value) + /* requires */ ( + is_integral_constant_like_v && + index_pair_like::value + ) ) MDSPAN_INLINE_FUNCTION constexpr auto last_of( - std::integral_constant, - const Extents &, - const Slice &i) + IntegralConstant, + const Extents&, + const Slice& i) { using std::get; return get<1>(i); } MDSPAN_TEMPLATE_REQUIRES( - size_t k, + class IntegralConstant, class Extents, class IdxT1, class IdxT2, - /* requires */ (index_pair_like, size_t>::value) + /* requires */ ( + is_integral_constant_like_v && + index_pair_like, size_t>::value + ) ) constexpr auto last_of( - std::integral_constant, - const Extents &, + IntegralConstant, + const Extents&, const std::tuple& i) { using std::get; @@ -341,25 +337,35 @@ constexpr auto last_of( } MDSPAN_TEMPLATE_REQUIRES( - size_t k, + class IntegralConstant, class Extents, class IdxT1, class IdxT2, - /* requires */ (index_pair_like, size_t>::value) + /* requires */ ( + is_integral_constant_like_v && + index_pair_like, size_t>::value + ) ) MDSPAN_INLINE_FUNCTION constexpr auto last_of( - std::integral_constant, - const Extents &, + IntegralConstant, + const Extents&, const std::pair& i) { return i.second; } -template +MDSPAN_TEMPLATE_REQUIRES( + class IntegralConstant, + class Extents, + class T, + /* requires */ ( + is_integral_constant_like_v + ) +) MDSPAN_INLINE_FUNCTION constexpr auto last_of( - std::integral_constant, - const Extents &, - const std::complex &i) + IntegralConstant, + const Extents&, + const std::complex& i) { return i.imag(); } @@ -385,29 +391,21 @@ constexpr auto last_of( #pragma diagnostic push #pragma diag_suppress = implicit_return_from_non_void_function #endif -template < -#if defined(MDSPAN_ENABLE_P3663) - auto -#else - size_t -#endif - k, - class Extents> + +MDSPAN_TEMPLATE_REQUIRES( + class IntegralConstant_k, + class Extents, + /* requires */ ( + is_integral_constant_like_v + ) +) MDSPAN_INLINE_FUNCTION constexpr auto last_of( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper k_input, -#else - std::integral_constant, -#endif - const Extents &ext, + IntegralConstant_k, + const Extents& ext, ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { -#if defined(MDSPAN_ENABLE_P3663) - constexpr size_t k_value = k_input(); -#else - constexpr size_t k_value = k; -#endif + constexpr size_t k_value = IntegralConstant_k{}(); if constexpr (Extents::static_extent(k_value) == dynamic_extent) { return ext.extent(k_value); @@ -436,27 +434,22 @@ constexpr auto last_of( #pragma diagnostic pop #endif -template < -#if defined(MDSPAN_ENABLE_P3663) - auto -#else - size_t -#endif - k, +MDSPAN_TEMPLATE_REQUIRES( + class IntegralConstant_k, class Extents, class OffsetType, class ExtentType, - class StrideType> + class StrideType, + /* requires */ ( + is_integral_constant_like_v + ) +) MDSPAN_INLINE_FUNCTION constexpr OffsetType last_of( -#if defined(MDSPAN_ENABLE_P3663) - std::constant_wrapper, -#else - std::integral_constant, -#endif - const Extents &, - const strided_slice &r) + IntegralConstant_k, + const Extents&, + const strided_slice& r) { return r.extent; // FIXME then why does this return OffsetType? } From 90968942a3a93774f8ceb9f5e95b6aff497bfe0c Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 17:34:18 -0700 Subject: [PATCH 087/103] Simplify constant_wrapper work-around Now there are only 3 cases: 1. std::constant_wrapper and std::cw actually exist 2. We reimplement them 3. We make constant_wrapper just look like integral_constant We now put the reimplementation in the MDSPAN_IMPL_STANDARD_NAMESPACE namespace, instead of the std namespace. (If (1), then we just have using declarations that bring the two names in from the std namespace.) --- .../__p2630_bits/constant_wrapper.hpp | 104 +++++------------- .../__p2630_bits/strided_slice.hpp | 4 +- .../__p2630_bits/submdspan_extents.hpp | 52 ++++----- .../__p2630_bits/submdspan_mapping.hpp | 4 +- tests/test_canonicalize_slices.cpp | 20 +++- tests/test_constant_wrapper.cpp | 40 ++++--- tests/test_strided_slice.cpp | 8 +- 7 files changed, 99 insertions(+), 133 deletions(-) diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index c9463cb9..ad031355 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -1,17 +1,23 @@ #pragma once +#include "../__p0009_bits/utility.hpp" #include #include // Implementation borrowed from // https://github.com/tzlaine/constexpr/blob/master/include/constant_wrapper.hpp -// to which P2781 links. Provisionally assume that the feature test -// macro will be called __cpp_lib_constant_wrapper and that the -// features in P2781 will go in . -#if ! defined(__cpp_lib_constant_wrapper) -#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +// to which P2781 links. -namespace std { +#if defined(__cpp_lib_constant_wrapper) + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + using std::constant_wrapper; + using std::cw; +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#elif ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace exposition_only { template @@ -116,7 +122,7 @@ namespace exposition_only { template friend constexpr auto operator->*(L, R) noexcept -> constant_wrapper*R::value> { return {}; } -#if defined(__cpp_explicit_this_parameter) +# if defined(__cpp_explicit_this_parameter) // call and index template constexpr auto operator()(this T, Args...) noexcept @@ -171,7 +177,7 @@ namespace exposition_only { template constexpr auto operator>>=(this T, R) noexcept requires requires(T::value_type x) { x >>= R::value; } { return constant_wrapper<[] { auto v = T::value; return v >>= R::value; }()>{}; } -#endif // __cpp_explicit_this_parameter +# endif // __cpp_explicit_this_parameter }; } @@ -193,30 +199,19 @@ struct constant_wrapper : exposition_only::cw_operators { constexpr operator decltype(auto)() const noexcept { return value; } constexpr decltype(auto) operator()() const noexcept requires (!std::invocable) { return value; } -#if defined(__cpp_explicit_this_parameter) +# if defined(__cpp_explicit_this_parameter) using exposition_only::cw_operators::operator(); -#endif +# endif // defined(__cpp_explicit_this_parameter) }; template constinit auto cw = constant_wrapper{}; -} // namespace std +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE -#else +#else // defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) -namespace std { - -namespace exposition_only { - template - struct cw_fixed_value; // exposition only - - template - constexpr bool is_cw_fixed_value_v = false; - - template - constexpr bool is_cw_fixed_value_v> = true; -} +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { // GCC 11.4.0 (C++20) has an internal compiler error (ICE) with // "typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type" @@ -245,43 +240,7 @@ namespace exposition_only { // because it claims that the non-type template parameter X has a different // type in the definition versus in the declaration. -namespace exposition_only { - template - struct cw_fixed_value { // exposition only - - static_assert(! std::is_array_v, "Not implemented for array types"); - static_assert(! is_cw_fixed_value_v, "cw_fixed_value recursion is forbidden"); - - using type = T; - constexpr cw_fixed_value(type v) noexcept: data(v) { } - T data; - }; - - template - cw_fixed_value(T) -> cw_fixed_value; // exposition only -} // namespace exposition_only - - -// This definition requires C++20 because it uses nontype template -// parameters of deduced class type. -#if(__cplusplus >= 202002L) -template< - exposition_only::cw_fixed_value X, - typename unspecified = typename decltype(X)::type // exposition only -> -struct constant_wrapper { - static constexpr const auto & value = X.data; - using type = constant_wrapper; - using value_type = typename decltype(X)::type; - - constexpr operator decltype(auto)() const noexcept { return value; } - constexpr decltype(auto) operator()() const noexcept - requires (!std::invocable) - { - return value; - } -}; -#else +namespace detail { template struct constant_wrapper_impl @@ -293,25 +252,14 @@ struct constant_wrapper_impl constexpr value_type operator()() const noexcept { return value; } }; -template -using constant_wrapper = constant_wrapper_impl; - -#endif // (__cplusplus >= 202002L) +} // namespace detail -#if defined(__cpp_constinit) -template - constinit auto cw = constant_wrapper{}; +template +using constant_wrapper = detail::constant_wrapper_impl; -#elif(__cplusplus >= 202002L) -template - constexpr auto cw = constant_wrapper{}; - -#else template constexpr auto cw = constant_wrapper{}; -#endif - -} // namespace std -#endif // ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) -#endif // ! defined(__cpp_lib_constant_wrapper) +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#endif // defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 74a30498..dc8215f0 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -30,13 +30,13 @@ namespace detail { #if defined(MDSPAN_ENABLE_P3663) template - using mdspan_constant_wrapper = decltype(std::cw); + using mdspan_constant_wrapper = decltype(cw); template constexpr bool is_constant_wrapper = false; template - constexpr bool is_constant_wrapper> = true; + constexpr bool is_constant_wrapper> = true; #else template using mdspan_constant_wrapper = std::integral_constant; diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 1bc7bbac..7afacb8e 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -183,8 +183,8 @@ constexpr Integral first_of(const Integral &i) { #if defined(MDSPAN_ENABLE_P3663) template MDSPAN_INLINE_FUNCTION -constexpr std::constant_wrapper -first_of(std::constant_wrapper) { +constexpr constant_wrapper +first_of(constant_wrapper) { return {}; } #else @@ -207,7 +207,7 @@ MDSPAN_INLINE_FUNCTION constexpr auto first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { - return std::cw; + return cw; } #else @@ -412,7 +412,7 @@ constexpr auto last_of( } else { #if defined(MDSPAN_ENABLE_P3663) - return std::cw; + return cw; #else return integral_constant(); #endif @@ -459,7 +459,7 @@ template MDSPAN_INLINE_FUNCTION constexpr auto stride_of(const T &) { #if defined(MDSPAN_ENABLE_P3663) - return std::cw; + return cw; #else return integral_constant(); #endif @@ -482,16 +482,16 @@ constexpr auto divide(const T0 &v0, const T1 &v1) { #if defined(MDSPAN_ENABLE_P3663) template MDSPAN_INLINE_FUNCTION -constexpr auto divide(std::constant_wrapper i0, - std::constant_wrapper i1) { - using I0 = typename std::constant_wrapper::value_type; - using I1 = typename std::constant_wrapper::value_type; +constexpr auto divide(constant_wrapper i0, + constant_wrapper i1) { + using I0 = typename constant_wrapper::value_type; + using I1 = typename constant_wrapper::value_type; static_assert(std::is_signed_v || std::is_unsigned_v); static_assert(std::is_signed_v || std::is_unsigned_v); // cutting short division by zero // this is used for strided_slice with zero extent/stride - return std::cw; + return cw; } #else template @@ -514,14 +514,14 @@ constexpr auto multiply(const T0 &v0, const T1 &v1) { #if defined(MDSPAN_ENABLE_P3663) template MDSPAN_INLINE_FUNCTION -constexpr auto multiply(std::constant_wrapper i0, - std::constant_wrapper i1) { - using I0 = typename std::constant_wrapper::value_type; - using I1 = typename std::constant_wrapper::value_type; +constexpr auto multiply(constant_wrapper i0, + constant_wrapper i1) { + using I0 = typename constant_wrapper::value_type; + using I1 = typename constant_wrapper::value_type; static_assert(std::is_signed_v || std::is_unsigned_v); static_assert(std::is_signed_v || std::is_unsigned_v); - return std::cw; + return cw; } #else template @@ -554,10 +554,10 @@ template struct StaticExtentFromStridedRange { #if defined(MDSPAN_ENABLE_P3663) template -struct StaticExtentFromStridedRange, std::constant_wrapper> { +struct StaticExtentFromStridedRange, constant_wrapper> { private: - static constexpr auto A_value = std::constant_wrapper{}(); - static constexpr auto B_value = std::constant_wrapper{}(); + static constexpr auto A_value = constant_wrapper{}(); + static constexpr auto B_value = constant_wrapper{}(); public: constexpr static size_t value = A_value > 0 ? 1 + (A_value - 1) / B_value : 0; }; @@ -609,7 +609,7 @@ struct extents_constructor { decltype(first_of(std::declval())), decltype(last_of( #if defined(MDSPAN_ENABLE_P3663) - std::cw, + cw, #else std::integral_constant(), #endif @@ -623,7 +623,7 @@ struct extents_constructor { ext, slices_and_extents..., index_t(last_of( #if defined(MDSPAN_ENABLE_P3663) - std::cw, + cw, #else std::integral_constant(), #endif @@ -732,7 +732,7 @@ constexpr auto canonical_ice([[maybe_unused]] S s) { // of `cw`, so we don't get a weird constant_wrapper whose value // has a different type than the second template argument. if constexpr (is_integral_constant_like_v) { - return std::cw(index_cast(S::value))>; + return cw(index_cast(S::value))>; } else { return static_cast(index_cast(s)); @@ -747,7 +747,7 @@ constexpr auto subtract_ice(X x, Y y) { if constexpr (is_integral_constant_like_v> && is_integral_constant_like_v>) { - return std::cw(Y::value) - canonical_ice(X::value))>; + return cw(Y::value) - canonical_ice(X::value))>; } else { return canonical_ice(y) - canonical_ice(x); @@ -1156,12 +1156,12 @@ submdspan_canonicalize_one_slice( return strided_slice{ .offset = canonical_ice(s.real()), .extent = canonical_ice(s.imag() - s.real()), - .stride = std::cw + .stride = cw }; #else auto offset = canonical_ice(s.real()); auto extent = canonical_ice(s.imag() - s.real()); - auto stride = std::cw; + auto stride = cw; return strided_slice { @@ -1183,12 +1183,12 @@ submdspan_canonicalize_one_slice( return strided_slice{ .offset = canonical_ice(s_k0), .extent = subtract_ice(s_k0, s_k1), - .stride = std::cw + .stride = cw }; #else auto offset = canonical_ice(s_k0); auto extent = subtract_ice(s_k0, s_k1); - auto stride = std::cw; + auto stride = cw; return strided_slice { diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 4cb5af38..e6328909 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -232,9 +232,9 @@ constexpr bool is_range_slice_v< strided_slice< OffsetType, ExtentType, - std::constant_wrapper>, + constant_wrapper>, IndexType - > = (std::constant_wrapper{}() == IndexType(1)); + > = (constant_wrapper{}() == IndexType(1)); #else diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index 7dcb8820..41f65eb9 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -24,6 +24,8 @@ namespace my_test { +using MDSPAN_IMPL_STANDARD_NAMESPACE::cw; + template struct my_aggregate_pair { First first; @@ -179,18 +181,22 @@ TEST(CanonicalizeSlices, Rank1_integer_dynamic) { } TEST(CanonicalizeSlices, Rank1_integer_static) { + using MDSPAN_IMPL_STANDARD_NAMESPACE::cw; + constexpr auto slice0 = std::integral_constant{}; - constexpr auto expected_slices = std::tuple{std::cw}; + constexpr auto expected_slices = std::tuple{cw}; constexpr auto exts = Kokkos::extents{}; test_canonicalize_slices(expected_slices, exts, slice0); } TEST(CanonicalizeSlices, Rank1_pair) { + using MDSPAN_IMPL_STANDARD_NAMESPACE::cw; + constexpr auto slice0 = std::pair{std::integral_constant{}, 11}; - constexpr auto offset = std::cw; + constexpr auto offset = cw; constexpr auto extent = size_t(4u); // 11 - 7 - constexpr auto stride = std::cw; + constexpr auto stride = cw; // Some compilers aren't so good at CTAD for aggregates. const auto expected_slices = std::tuple{ @@ -209,11 +215,13 @@ TEST(CanonicalizeSlices, Rank1_pair) { } TEST(CanonicalizeSlices, Rank1_aggregate_pair) { + using MDSPAN_IMPL_STANDARD_NAMESPACE::cw; + constexpr auto slice0 = my_test::my_aggregate_pair{7, 11}; constexpr auto offset = size_t(7u); constexpr auto extent = (size_t(11u) - size_t(7u)); - constexpr auto stride = std::cw; + constexpr auto stride = cw; // Some compilers aren't so good at CTAD for aggregates. const auto expected_slices = std::tuple{ @@ -232,11 +240,13 @@ TEST(CanonicalizeSlices, Rank1_aggregate_pair) { } TEST(CanonicalizeSlices, Rank1_nonaggregate_pair) { + using MDSPAN_IMPL_STANDARD_NAMESPACE::cw; + constexpr auto slice0 = my_test::my_nonaggregate_pair(7, 11); constexpr auto offset = size_t(7u); constexpr auto extent = (size_t(11u) - size_t(7u)); - constexpr auto stride = std::cw; + constexpr auto stride = cw; // Some compilers aren't so good at CTAD for aggregates. const auto expected_slices = std::tuple{ diff --git a/tests/test_constant_wrapper.cpp b/tests/test_constant_wrapper.cpp index 93251055..074a41be 100644 --- a/tests/test_constant_wrapper.cpp +++ b/tests/test_constant_wrapper.cpp @@ -31,20 +31,23 @@ using IC = std::integral_constant; template constexpr void test_integral_constant_wrapper(IC ic) { - constexpr auto c = std::cw; + using MDSPAN_IMPL_STANDARD_NAMESPACE::cw; + using MDSPAN_IMPL_STANDARD_NAMESPACE::constant_wrapper; + + constexpr auto c = cw; static_assert(std::is_same_v< - decltype(std::cw), - std::constant_wrapper>); + decltype(cw), + constant_wrapper>); static_assert(decltype(c)::value == Value); static_assert(std::is_same_v< typename decltype(c)::type, - std::constant_wrapper>); + constant_wrapper>); static_assert(std::is_same_v< typename decltype(c)::value_type, Integral>); - constexpr auto c2 = std::cw; + constexpr auto c2 = cw; // Casting the arithmetic result back to Integral undoes // any integer promotions (e.g., short + short -> int). constexpr auto val_plus_1 = Integral(Value + Integral(1)); @@ -72,36 +75,39 @@ TEST(TestConstantWrapper, Construction) { #endif TEST(TestConstantWrapper, IntegerPlus) { - std::constant_wrapper cw_11; + using MDSPAN_IMPL_STANDARD_NAMESPACE::cw; + using MDSPAN_IMPL_STANDARD_NAMESPACE::constant_wrapper; + + constant_wrapper cw_11; constexpr size_t value = cw_11; constexpr size_t value2 = cw_11(); static_assert(value == value2); constexpr size_t value3 = decltype(cw_11)(); static_assert(value == value3); -#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) && (__cplusplus >= 202002L) +#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) static_assert(std::is_same_v< decltype(cw_11), - decltype(std::cw)>); + decltype(cw)>); #endif - [[maybe_unused]] auto expected_result = std::cw; - using expected_type = std::constant_wrapper; + [[maybe_unused]] auto expected_result = cw; + using expected_type = constant_wrapper; static_assert(std::is_same_v); #if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - [[maybe_unused]] auto cw_11_plus_one = cw_11 + std::cw; - [[maybe_unused]] auto one_plus_cw_11 = std::cw + cw_11; + [[maybe_unused]] auto cw_11_plus_one = cw_11 + cw; + [[maybe_unused]] auto one_plus_cw_11 = cw + cw_11; static_assert(! std::is_same_v< - decltype(cw_11 + std::cw), + decltype(cw_11 + cw), size_t>); static_assert(std::is_same_v< - decltype(cw_11 + std::cw), - std::constant_wrapper>); + decltype(cw_11 + cw), + constant_wrapper>); static_assert(std::is_same_v< - decltype(std::cw + cw_11), - std::constant_wrapper>); + decltype(cw + cw_11), + constant_wrapper>); #endif } diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp index 30963c53..c0916148 100644 --- a/tests/test_strided_slice.cpp +++ b/tests/test_strided_slice.cpp @@ -86,9 +86,11 @@ TEST(StridedSlice, WellFormed) { test_strided_slice(int(1), unsigned(10), IC); #if defined(MDSPAN_ENABLE_P3663) - test_strided_slice(std::cw<1>, unsigned(10), long(3)); - test_strided_slice(int(1), std::cw, long(3)); - test_strided_slice(int(1), unsigned(10), std::cw); + using MDSPAN_IMPL_STANDARD_NAMESPACE::cw; + + test_strided_slice(cw<1>, unsigned(10), long(3)); + test_strided_slice(int(1), cw, long(3)); + test_strided_slice(int(1), unsigned(10), cw); test_strided_slice(IC2, unsigned(10), long(3)); test_strided_slice(int(1), IC2, long(3)); From a54a075fb05d52d0e5525ee6bc17a4243c7c1af4 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 17:42:03 -0700 Subject: [PATCH 088/103] Fix some (un)signed comparison warnings --- include/experimental/__p2630_bits/submdspan_extents.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 7afacb8e..dc7c74d3 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -893,7 +893,7 @@ template Exts_k != dynamic_extent && 0 <= de_ice(offset_type{}) && de_ice(offset_type{}) <= de_ice(offset_type{}) + de_ice(extent_type{}) && - de_ice(offset_type{}) + de_ice(extent_type{}) <= Exts_k) + static_cast(de_ice(offset_type{}) + de_ice(extent_type{})) <= Exts_k) { return check_static_bounds_result::in_bounds; // 14.3.5 } @@ -960,7 +960,7 @@ template Exts_k != dynamic_extent && 0 <= de_ice(S_k0{}) && de_ice(S_k0{}) <= de_ice(S_k1{}) && - de_ice(S_k1{}) <= Exts_k) + static_cast(de_ice(S_k1{})) <= Exts_k) { return check_static_bounds_result::in_bounds; // 14.4.5 } From 69d73fb4e51fc980cff439ce04bf67b039b03044 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 17:46:41 -0700 Subject: [PATCH 089/103] Fix unused parameter warning --- include/experimental/__p2630_bits/submdspan_extents.hpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index dc7c74d3..7923b6eb 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -740,8 +740,7 @@ constexpr auto canonical_ice([[maybe_unused]] S s) { } template -constexpr auto subtract_ice(X x, Y y) { -#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +constexpr auto subtract_ice([[maybe_unused]] X x, [[maybe_unused]] Y y) { // Key to the work-around is acknowledging that GCC 11.4.0 can't find // constant_wrapper's overloaded arithmetic operators. if constexpr (is_integral_constant_like_v> && @@ -752,9 +751,6 @@ constexpr auto subtract_ice(X x, Y y) { else { return canonical_ice(y) - canonical_ice(x); } -#else - return canonical_ice(y) - canonical_ice(x); -#endif } MDSPAN_TEMPLATE_REQUIRES( From dc384a62e92bd55ddb8197959625d927bf01de2d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 22:15:12 -0700 Subject: [PATCH 090/103] Work around icpx bug --- tests/test_strided_slice.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_strided_slice.cpp b/tests/test_strided_slice.cpp index c0916148..c7812bac 100644 --- a/tests/test_strided_slice.cpp +++ b/tests/test_strided_slice.cpp @@ -53,7 +53,9 @@ MDSPAN_TEMPLATE_REQUIRES( struct my_integral_constant { static constexpr T value = Value; constexpr operator T () const { return value; } -#if defined(__cpp_static_call_operator) + // icpx insists that, even with the macro protection, + // "declaring overloaded 'operator()' as 'static' is a C++2b extension." +#if (__cplusplus >= 202302L) && defined(__cpp_static_call_operator) static constexpr T operator() () { return value; } #endif }; From ab198e4320911b567d72bfd3e290c1e7e3bf63f4 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 22:27:38 -0700 Subject: [PATCH 091/103] Hide is_layout_mapping_alike from C++ < 17 as it depends on std::bool_constant, a C++17 feature. --- include/experimental/__p0009_bits/layout_stride.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/experimental/__p0009_bits/layout_stride.hpp b/include/experimental/__p0009_bits/layout_stride.hpp index e8d05d2a..ee50a191 100644 --- a/include/experimental/__p0009_bits/layout_stride.hpp +++ b/include/experimental/__p0009_bits/layout_stride.hpp @@ -106,9 +106,12 @@ namespace detail { template constexpr bool is_layout_mapping_alike_v = layout_mapping_alike; -#else +#elif MDSPAN_HAS_CXX_17 - // C++17-compatible implementation of layout_mapping_alike (used for is_layout_stride_mapping_v) + // C++17-compatible implementation of layout_mapping_alike + // (used for is_layout_stride_mapping_v). + // C++14 doesn't have bool_constant. That's OK; + // we generally don't try to back-port submdspan to C++14. template struct is_layout_mapping_alike_impl : std::false_type {}; From 0c90ab9b0aa23fc3f60bfd4788ea952a5db74039 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 22:41:29 -0700 Subject: [PATCH 092/103] PREVIOUS COMMIT PASSES ALL CHECK-IN TESTS Small cleanup --- include/experimental/__p2630_bits/strided_slice.hpp | 6 ------ include/experimental/__p2630_bits/submdspan_mapping.hpp | 4 +--- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index dc8215f0..0980f9d9 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -29,17 +29,11 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { #if defined(MDSPAN_ENABLE_P3663) - template - using mdspan_constant_wrapper = decltype(cw); - template constexpr bool is_constant_wrapper = false; template constexpr bool is_constant_wrapper> = true; -#else - template - using mdspan_constant_wrapper = std::integral_constant; #endif // MDSPAN_ENABLE_P3663 template diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index e6328909..1cc223af 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -42,7 +42,6 @@ #endif namespace MDSPAN_IMPL_STANDARD_NAMESPACE { - //****************************************** // Return type of submdspan_mapping overloads //****************************************** @@ -55,7 +54,6 @@ namespace detail { #if defined(MDSPAN_ENABLE_P3663) - MDSPAN_TEMPLATE_REQUIRES( class LayoutMapping, size_t... Inds, @@ -151,7 +149,7 @@ one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { // For index types that are not integral but are nevertheless convertible // to integral, it would result in build errors when attempting to find // a common type between first_of(slice) and IndexType. This is because - // first_of(slice) in that case would return the origina slice type, + // first_of(slice) in that case would return the original slice type, // which might not necessarily be convertible to IndexType. The problem // is really in first_of: the analogous function in the Standard, // _`first`_`_`, is aware of IndexType and casts slices whose types From 59be1749a18544766cd07fe212d2042bd57f25a0 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 10 Feb 2026 22:44:37 -0700 Subject: [PATCH 093/103] DRIVE-BY FIX: Actually implement P2389 dims was in the experimental namespace; it belongs in the main namespace. --- include/experimental/__p2389_bits/dims.hpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/experimental/__p2389_bits/dims.hpp b/include/experimental/__p2389_bits/dims.hpp index 8f991f51..77eca4ed 100644 --- a/include/experimental/__p2389_bits/dims.hpp +++ b/include/experimental/__p2389_bits/dims.hpp @@ -16,11 +16,19 @@ #pragma once -// backward compatibility import into experimental namespace MDSPAN_IMPL_STANDARD_NAMESPACE { - + template< ::std::size_t Rank, class IndexType = std::size_t> using dims = :: MDSPAN_IMPL_STANDARD_NAMESPACE :: dextents; + +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +// backward compatibility import into experimental +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { + +using :: MDSPAN_IMPL_STANDARD_NAMESPACE :: dims; +} // namespace MDSPAN_IMPL_PROPOSED_NAMESPACE } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE From 6fe00d713daa1437bae4d259ec4f5f08b52231f0 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 13 Feb 2026 15:58:25 -0700 Subject: [PATCH 094/103] Remove strided_slice deduction guide --- include/experimental/__p2630_bits/strided_slice.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index 0980f9d9..d406165a 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -58,9 +58,4 @@ struct strided_slice { static_assert(detail::__mdspan_is_index_like_v); }; -#if (__cplusplus < 202002L) -template -strided_slice(const OffsetType&, const ExtentType&, const StrideType&) -> - strided_slice; -#endif } // MDSPAN_IMPL_STANDARD_NAMESPACE From 2bbe7485c0ef1fd67927a47e7f6b552f37b6d697 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 15 Feb 2026 17:34:14 -0700 Subject: [PATCH 095/103] Fix dims merge --- include/experimental/__p2389_bits/dims.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p2389_bits/dims.hpp b/include/experimental/__p2389_bits/dims.hpp index afb60de2..943facda 100644 --- a/include/experimental/__p2389_bits/dims.hpp +++ b/include/experimental/__p2389_bits/dims.hpp @@ -18,6 +18,8 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -using :: MDSPAN_IMPL_STANDARD_NAMESPACE :: dims; +template< ::std::size_t Rank, class IndexType = std::size_t> +using dims = + :: MDSPAN_IMPL_STANDARD_NAMESPACE :: dextents; } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE From 345dda4819b64e26e27ff78839fbb7896dd6020a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 15 Feb 2026 17:49:19 -0700 Subject: [PATCH 096/103] Fix __mdspan_is_index_like_v For both P3663 and no-P3663 branches, fix __mdspan_is_index_like_v so that it now correctly excludes std::bool_constant or any integral-constant-like with bool value type. --- .../__p2630_bits/strided_slice.hpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p2630_bits/strided_slice.hpp b/include/experimental/__p2630_bits/strided_slice.hpp index d406165a..718a842f 100644 --- a/include/experimental/__p2630_bits/strided_slice.hpp +++ b/include/experimental/__p2630_bits/strided_slice.hpp @@ -36,10 +36,26 @@ namespace detail { constexpr bool is_constant_wrapper> = true; #endif // MDSPAN_ENABLE_P3663 + template + struct is_signed_or_unsigned_integral_constant_like : std::false_type {}; + + template + struct is_signed_or_unsigned_integral_constant_like< + T, std::enable_if_t> + > : std::bool_constant< + std::is_integral_v> && + ! std::is_same_v> + > + {}; + + template + constexpr bool is_signed_or_unsigned_integral_constant_like_v = + is_signed_or_unsigned_integral_constant_like::value; + template constexpr bool __mdspan_is_index_like_v = (std::is_integral_v && ! std::is_same_v) || - is_integral_constant_like_v; + is_signed_or_unsigned_integral_constant_like_v; } // namespace detail // Slice Specifier allowing for strides and compile time extent From e0678c99bbfaec9133dd7c38e3b2e797464344ce Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 15 Feb 2026 17:57:24 -0700 Subject: [PATCH 097/103] Remove C++26 branch from submdspan --- include/experimental/__p2630_bits/submdspan.hpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index ffedcb6d..db88c7c7 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -46,16 +46,9 @@ submdspan(const mdspan &src, #if defined(MDSPAN_ENABLE_P3663) -# if defined(__cpp_structured_bindings) && (__cpp_structured_bindings >= 202411L) - // Rely on P1061R10, "Structured bindings can introduce a pack." - // Clang 21 implements this, but GCC 15 does not. - - auto [...canonical_slices] = - submdspan_canonicalize_slices(src.extents(), slices...); - auto sub_map_result = - submdspan_mapping(src.mapping(), canonical_slices...); - -# else + // The wording relies on P1061R10, "Structured bindings can introduce a pack." + // That's a C++26 feature. Clang 21 implements it, but GCC 15 does not. + // We back-port to C++17 here. auto canonical_slices_tuple = submdspan_canonicalize_slices(src.extents(), slices...); @@ -64,8 +57,6 @@ submdspan(const mdspan &src, detail::submdspan_mapping_caller{src.mapping()}, canonical_slices_tuple); -# endif // defined(__cpp_structured_bindings) && (__cpp_structured_bindings >= 202411L) - return mdspan( src.accessor().offset(src.data_handle(), sub_map_result.offset), sub_map_result.mapping, From f3da5960f3075add27d8e35267a85794e3a8fd7e Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 15 Feb 2026 18:05:16 -0700 Subject: [PATCH 098/103] Simplify submdspan per review comment to use a lambda with `auto&&... canonical_slices` capture instead of a named functor. --- .../experimental/__p2630_bits/submdspan.hpp | 27 +++---------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan.hpp b/include/experimental/__p2630_bits/submdspan.hpp index db88c7c7..bcfa4083 100644 --- a/include/experimental/__p2630_bits/submdspan.hpp +++ b/include/experimental/__p2630_bits/submdspan.hpp @@ -21,22 +21,6 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -#if defined(MDSPAN_ENABLE_P3663) && (! defined(__cpp_structured_bindings) || (__cpp_structured_bindings < 202411L)) - -namespace detail { - template - struct submdspan_mapping_caller { - const Mapping& src_mapping; - - template - constexpr auto operator() (Slices&&... slices) const { - return submdspan_mapping(src_mapping, std::forward(slices)...); - } - }; -} // namespace detail - -#endif // (__cplusplus < 202002L) - template MDSPAN_INLINE_FUNCTION @@ -45,17 +29,14 @@ submdspan(const mdspan &src, SliceSpecifiers... slices) { #if defined(MDSPAN_ENABLE_P3663) - // The wording relies on P1061R10, "Structured bindings can introduce a pack." // That's a C++26 feature. Clang 21 implements it, but GCC 15 does not. - // We back-port to C++17 here. - auto canonical_slices_tuple = - submdspan_canonicalize_slices(src.extents(), slices...); - using src_mapping_type = decltype(src.mapping()); // CTAD doesn't seem to work auto sub_map_result = std::apply( - detail::submdspan_mapping_caller{src.mapping()}, - canonical_slices_tuple); + [&] (auto&&... canonical_slices) { + return submdspan_mapping(src.mapping(), + std::forward(canonical_slices)...); + }, submdspan_canonicalize_slices(src.extents(), slices...)); return mdspan( src.accessor().offset(src.data_handle(), sub_map_result.offset), From 087936fff343956c52f29ecd94831fce59ee1076 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 15 Feb 2026 18:18:21 -0700 Subject: [PATCH 099/103] Simplify first_of definition --- .../__p2630_bits/submdspan_extents.hpp | 64 +++++++------------ 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 7923b6eb..6989e564 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -132,15 +132,32 @@ struct index_pair_like, IndexType> { // first_of(slice): getting begin of slice specifier range +template +MDSPAN_INLINE_FUNCTION +constexpr OffsetType +first_of(const strided_slice& r) { + return r.offset; +} + #if defined(MDSPAN_ENABLE_P3663) -MDSPAN_TEMPLATE_REQUIRES( - class Integral, - /* requires */(std::is_signed_v || std::is_unsigned_v) -) +template MDSPAN_INLINE_FUNCTION -constexpr Integral first_of(Integral i) { - return i; +constexpr T +first_of([[maybe_unused]] T t) { + if constexpr (std::is_signed_v || std::is_unsigned_v) { + return t; + } + else { // if constexpr (is_constant_wrapper_v) { + static_assert(is_constant_wrapper); + return T{}; + } +} + +MDSPAN_INLINE_FUNCTION +constexpr auto +first_of([[maybe_unused]] ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { + return cw; } #else @@ -178,16 +195,6 @@ constexpr Integral first_of(const Integral &i) { return i; } -#endif // MDSPAN_ENABLE_P3663 - -#if defined(MDSPAN_ENABLE_P3663) -template -MDSPAN_INLINE_FUNCTION -constexpr constant_wrapper -first_of(constant_wrapper) { - return {}; -} -#else // NOTE This is technically not conforming. // Pre-P3663, first_of should work on any integral-constant-like type. // Replacing the return type "Integral" with auto does not change test results. @@ -197,20 +204,6 @@ constexpr Integral first_of(const std::integral_constant&) { return integral_constant(); } -#endif - - - -#if defined(MDSPAN_ENABLE_P3663) - -MDSPAN_INLINE_FUNCTION -constexpr -auto -first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { - return cw; -} - -#else MDSPAN_INLINE_FUNCTION constexpr @@ -219,14 +212,10 @@ first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { return {}; } -#endif // MDSPAN_ENABLE_P3663 - - // P3663 doesn't need any of these overloads, // because its version of first_of will never see pair-like types. // (The only "contiguous range of indices" slice types it sees are // full_extent_t and strided_slice with compile-time unit stride.) -#if ! defined(MDSPAN_ENABLE_P3663) MDSPAN_TEMPLATE_REQUIRES( class Slice, @@ -262,13 +251,6 @@ constexpr auto first_of(const std::complex &i) { #endif -template -MDSPAN_INLINE_FUNCTION -constexpr OffsetType -first_of(const strided_slice &r) { - return r.offset; -} - // last_of(slice): getting end of slice specifier range // We need however not just the slice but also the extents // of the original view and which rank from the extents. From efb48276527b50391af0e69612c4d834166b6f0a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 15 Feb 2026 18:22:03 -0700 Subject: [PATCH 100/103] Simplify first_of more --- .../experimental/__p2630_bits/submdspan_extents.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 6989e564..445a0682 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -141,6 +141,12 @@ first_of(const strided_slice& r) { #if defined(MDSPAN_ENABLE_P3663) +MDSPAN_INLINE_FUNCTION +constexpr auto +first_of([[maybe_unused]] ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { + return cw; +} + template MDSPAN_INLINE_FUNCTION constexpr T @@ -154,12 +160,6 @@ first_of([[maybe_unused]] T t) { } } -MDSPAN_INLINE_FUNCTION -constexpr auto -first_of([[maybe_unused]] ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { - return cw; -} - #else // NOTE (mfh 2025/06/06) The original "return i;" was not conforming, From e1505ddf4158fe252a2cf6b80cf625a49ea45a97 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 15 Feb 2026 18:30:03 -0700 Subject: [PATCH 101/103] Remove outdated comment --- include/experimental/__p2630_bits/submdspan_extents.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 445a0682..f9eed626 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -274,11 +274,6 @@ constexpr Integral last_of( return i; } -// clang++ with C++14 is not fond of the pragma appearing inside the -// macro definition. In that case, it complains, "error: embedding a -// directive within macro arguments has undefined behavior -// [-Werror,-Wembedded-directive]." The fix is to duplicate code. - #if ! defined(MDSPAN_ENABLE_P3663) // P3663 does not need these index_pair_like overloads, From 971c8fb6068bcebf4399cf4a18e8133633c0ec91 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 15 Feb 2026 23:03:27 -0700 Subject: [PATCH 102/103] Fixes so that GCC trunk builds GCC trunk ("16.0.1") has std::constant_wrapper. This commit fixes any build errors that resulted from using that as the "constant_wrapper" in mdspan. It also works around some pack indexing issues that seem unique to GCC. --- .../__p2630_bits/submdspan_extents.hpp | 42 ++++--------------- .../__p2630_bits/submdspan_mapping.hpp | 2 +- tests/test_constant_wrapper.cpp | 2 +- 3 files changed, 11 insertions(+), 35 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index f9eed626..7e95185d 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -382,7 +382,7 @@ constexpr auto last_of( const Extents& ext, ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { - constexpr size_t k_value = IntegralConstant_k{}(); + constexpr size_t k_value = IntegralConstant_k::value; if constexpr (Extents::static_extent(k_value) == dynamic_extent) { return ext.extent(k_value); @@ -468,7 +468,9 @@ constexpr auto divide(constant_wrapper i0, // cutting short division by zero // this is used for strided_slice with zero extent/stride - return cw; + constexpr auto i0_value = static_cast(i0); + constexpr auto i1_value = static_cast(i1); + return cw; } #else template @@ -498,7 +500,9 @@ constexpr auto multiply(constant_wrapper i0, static_assert(std::is_signed_v || std::is_unsigned_v); static_assert(std::is_signed_v || std::is_unsigned_v); - return cw; + constexpr auto i0_value = static_cast(i0); + constexpr auto i1_value = static_cast(i1); + return cw; } #else template @@ -533,8 +537,8 @@ template struct StaticExtentFromStridedRange { template struct StaticExtentFromStridedRange, constant_wrapper> { private: - static constexpr auto A_value = constant_wrapper{}(); - static constexpr auto B_value = constant_wrapper{}(); + static constexpr auto A_value = constant_wrapper::value; + static constexpr auto B_value = constant_wrapper::value; public: constexpr static size_t value = A_value > 0 ? 1 + (A_value - 1) / B_value : 0; }; @@ -1021,10 +1025,6 @@ check_canonical_kth_submdspan_slice_type( } } - -#if defined(__cpp_pack_indexing) && (! (defined(__GNUC__) && (__GNUC__ < 16))) -// nothing -#else template constexpr decltype(auto) get_kth_in_pack(First&& first, Rest&&... rest) { static_assert(k <= sizeof...(Rest)); @@ -1035,9 +1035,7 @@ constexpr decltype(auto) get_kth_in_pack(First&& first, Rest&&... rest) { return get_kth_in_pack(std::forward(rest)...); } } -#endif -#if (__cplusplus < 202002L) template MDSPAN_INLINE_FUNCTION constexpr void @@ -1050,7 +1048,6 @@ check_canonical_kth_subdmspan_slice_types_impl( exts, get_kth_in_pack(slices...)), ...); } -#endif // (__cplusplus < 202002L) template MDSPAN_INLINE_FUNCTION @@ -1058,23 +1055,8 @@ constexpr void check_canonical_kth_subdmspan_slice_types( const extents& exts, Slices... slices) { -#if (__cplusplus < 202002L) check_canonical_kth_subdmspan_slice_types_impl( std::make_index_sequence(), exts, slices...); -#else - // We really want to keep the C++20 branch here - // because it could offer compile time advantages. - [&] (std::index_sequence) { - (check_canonical_kth_submdspan_slice_type( - exts, -#if defined(__cpp_pack_indexing) && (! (defined(__GNUC__) && (__GNUC__ < 16))) - slices...[Inds] -#else - get_kth_in_pack(slices...) -#endif - ), ...); - } (std::make_index_sequence{}); -#endif // (__cplusplus < 202002L) } // [mdspan.sub.slices] 11 @@ -1196,13 +1178,7 @@ submdspan_canonicalize_slices_impl( // That implements the Mandates clause of [mdspan.sub.slices] 9. detail::submdspan_canonicalize_one_slice( exts, - // Clang 21 accepts this code. - // GCC 15.1.0 emits an error: "cannot index an empty pack." -#if defined(__cpp_pack_indexing) && (! (defined(__GNUC__) && (__GNUC__ < 16))) - slices...[Inds] -#else detail::get_kth_in_pack(slices...) -#endif )... }; } diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 951a2995..b1e467be 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -232,7 +232,7 @@ constexpr bool is_range_slice_v< ExtentType, constant_wrapper>, IndexType - > = (constant_wrapper{}() == IndexType(1)); + > = (constant_wrapper::value == IndexType(1)); #else diff --git a/tests/test_constant_wrapper.cpp b/tests/test_constant_wrapper.cpp index 074a41be..3d533b1b 100644 --- a/tests/test_constant_wrapper.cpp +++ b/tests/test_constant_wrapper.cpp @@ -80,7 +80,7 @@ TEST(TestConstantWrapper, IntegerPlus) { constant_wrapper cw_11; constexpr size_t value = cw_11; - constexpr size_t value2 = cw_11(); + constexpr size_t value2 = constant_wrapper::value; static_assert(value == value2); constexpr size_t value3 = decltype(cw_11)(); static_assert(value == value3); From 426a5a3ca9dc06fd888bf3fe082174092ab715cf Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 16 Feb 2026 09:15:09 -0700 Subject: [PATCH 103/103] Remove constant_wrapper back-port GCC trunk ("16.0.1") has std::constant_wrapper. This commit builds and passes tests with that. Thus, I've removed the constant_wrapper back-port (that really only worked with GCC 15 and Clang 21). There are now only two branches. 1. Use actual std::constant_wrapper and std::cw 2. Use something that only superficially has the same interface but is really just an integral_constant clone --- CMakeLists.txt | 3 - benchmarks/CMakeLists.txt | 18 -- benchmarks/submdspan_generic.hpp | 20 +- .../__p2630_bits/constant_wrapper.hpp | 240 +----------------- .../__p2630_bits/submdspan_extents.hpp | 42 +-- tests/CMakeLists.txt | 6 - tests/test_canonicalize_slices.cpp | 4 +- tests/test_constant_wrapper.cpp | 12 +- 8 files changed, 21 insertions(+), 324 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 38a71986..a17e9a87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,9 +34,6 @@ option(MDSPAN_ENABLE_CONCEPTS "Try to enable concepts support by giving extra fl option(MDSPAN_ENABLE_P3663 "Enable implementation of P3663 (Future-proof submdspan_mapping)." On) -# Defaults to ON, because this has only been tested with Clang 21 (development Clang). -option(MDSPAN_CONSTANT_WRAPPER_WORKAROUND "If MDSPAN_ENABLE_P3663 is enabled, work around some compilers' inability to build constant_wrapper." ON) - ################################################################################ # Decide on the standard to use diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 918c8172..a3329a32 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -10,12 +10,6 @@ function(mdspan_add_benchmark EXENAME) if(MDSPAN_ENABLE_P3663) target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) endif() - if(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - target_compile_definitions(${EXENAME} - PUBLIC - MDSPAN_CONSTANT_WRAPPER_WORKAROUND=1 - ) - endif() endfunction() if(MDSPAN_USE_SYSTEM_BENCHMARK) @@ -78,12 +72,6 @@ function(mdspan_add_cuda_benchmark EXENAME) if(MDSPAN_ENABLE_P3663) target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) endif() - if(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - target_compile_definitions(${EXENAME} - PUBLIC - MDSPAN_CONSTANT_WRAPPER_WORKAROUND=1 - ) - endif() endfunction() if(MDSPAN_ENABLE_OPENMP) @@ -102,12 +90,6 @@ function(mdspan_add_openmp_benchmark EXENAME) if(MDSPAN_ENABLE_P3663) target_compile_definitions(${EXENAME} PUBLIC MDSPAN_ENABLE_P3663=1) endif() - if(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - target_compile_definitions(${EXENAME} - PUBLIC - MDSPAN_CONSTANT_WRAPPER_WORKAROUND=1 - ) - endif() else() message(WARNING "Not adding target ${EXENAME} because OpenMP was not found") endif() diff --git a/benchmarks/submdspan_generic.hpp b/benchmarks/submdspan_generic.hpp index a5bacc63..1d18e86d 100644 --- a/benchmarks/submdspan_generic.hpp +++ b/benchmarks/submdspan_generic.hpp @@ -29,8 +29,6 @@ namespace submdspan_benchmark { -#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - template constexpr typename Kokkos::mdspan::reference get_broadcast_element_impl( @@ -54,19 +52,6 @@ get_broadcast_element( return get_broadcast_element_impl(x, broadcast_index, std::make_index_sequence()); } -#else - -template -constexpr typename Kokkos::mdspan, Layout, Accessor>::reference -get_broadcast_element( - const Kokkos::mdspan, Layout, Accessor>& x, - typename Kokkos::extents::index_type broadcast_index) -{ - return x[((void) Exts, broadcast_index)...]; -} - -#endif - template using nonconst_test_mdspan = Kokkos::mdspan>; @@ -229,11 +214,8 @@ constexpr MDSPAN_FUNCTION auto slice_one_extent( Kokkos::mdspan, Layout, Accessor> x, Slice slice) { if constexpr (sizeof...(Exts) == 0) { -#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) + // Apparent redundancy is just a back-port of static_assert(false). static_assert(sizeof...(Exts) != 0, "slice_one_extent called with no extents"); -#else - static_assert(false, "slice_one_extent called with no extents"); -#endif } else if constexpr (sizeof...(Exts) == 1) { return Kokkos::submdspan(x, slice); diff --git a/include/experimental/__p2630_bits/constant_wrapper.hpp b/include/experimental/__p2630_bits/constant_wrapper.hpp index ad031355..908ccc53 100644 --- a/include/experimental/__p2630_bits/constant_wrapper.hpp +++ b/include/experimental/__p2630_bits/constant_wrapper.hpp @@ -2,243 +2,15 @@ #include "../__p0009_bits/utility.hpp" #include -#include - -// Implementation borrowed from -// https://github.com/tzlaine/constexpr/blob/master/include/constant_wrapper.hpp -// to which P2781 links. - -#if defined(__cpp_lib_constant_wrapper) - -namespace MDSPAN_IMPL_STANDARD_NAMESPACE { - using std::constant_wrapper; - using std::cw; -} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE - -#elif ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -namespace exposition_only { - template - struct cw_fixed_value; // exposition only -} - -template< - exposition_only::cw_fixed_value X, - typename unspecified = - typename decltype(exposition_only::cw_fixed_value(X))::type // exposition only -> -struct constant_wrapper; - -template -concept constexpr_param = requires { typename constant_wrapper; }; // exposition only - -namespace exposition_only { - template - struct cw_fixed_value { // exposition only - using type = T; - constexpr cw_fixed_value(type v) noexcept: data(v) { } - T data; - }; - - template - struct cw_fixed_value { // exposition only - using type = T[Extent]; - constexpr cw_fixed_value(T (&arr)[Extent]) noexcept: cw_fixed_value(arr, std::make_index_sequence()) { } - T data[Extent]; - - private: - template - constexpr cw_fixed_value(T (&arr)[Extent], std::index_sequence) noexcept: data{arr[Idx]...} { } - }; - - template - cw_fixed_value(T (&)[Extent]) -> cw_fixed_value; // exposition only - template - cw_fixed_value(T) -> cw_fixed_value; // exposition only - - struct cw_operators { // exposition only - // unary operators - template - friend constexpr auto operator+(T) noexcept -> constant_wrapper<(+T::value)> { return {}; } - template - friend constexpr auto operator-(T) noexcept -> constant_wrapper<(-T::value)> { return {}; } - template - friend constexpr auto operator~(T) noexcept -> constant_wrapper<(~T::value)> { return {}; } - template - friend constexpr auto operator!(T) noexcept -> constant_wrapper<(!T::value)> { return {}; } - template - friend constexpr auto operator&(T) noexcept -> constant_wrapper<(&T::value)> { return {}; } - template - friend constexpr auto operator*(T) noexcept -> constant_wrapper<(*T::value)> { return {}; } - - // binary operators - template - friend constexpr auto operator+(L, R) noexcept -> constant_wrapper<(L::value + R::value)> { return {}; } - template - friend constexpr auto operator-(L, R) noexcept -> constant_wrapper<(L::value - R::value)> { return {}; } - template - friend constexpr auto operator*(L, R) noexcept -> constant_wrapper<(L::value * R::value)> { return {}; } - template - friend constexpr auto operator/(L, R) noexcept -> constant_wrapper<(L::value / R::value)> { return {}; } - template - friend constexpr auto operator%(L, R) noexcept -> constant_wrapper<(L::value % R::value)> { return {}; } - - template - friend constexpr auto operator<<(L, R) noexcept -> constant_wrapper<(L::value << R::value)> { return {}; } - template - friend constexpr auto operator>>(L, R) noexcept -> constant_wrapper<(L::value >> R::value)> { return {}; } - template - friend constexpr auto operator&(L, R) noexcept -> constant_wrapper<(L::value & R::value)> { return {}; } - template - friend constexpr auto operator|(L, R) noexcept -> constant_wrapper<(L::value | R::value)> { return {}; } - template - friend constexpr auto operator^(L, R) noexcept -> constant_wrapper<(L::value ^ R::value)> { return {}; } - - template - requires (!is_constructible_v || !is_constructible_v) - friend constexpr auto operator&&(L, R) noexcept -> constant_wrapper<(L::value && R::value)> { return {}; } - template - requires (!is_constructible_v || !is_constructible_v) - friend constexpr auto operator||(L, R) noexcept -> constant_wrapper<(L::value || R::value)> { return {}; } - - // comparisons - template - friend constexpr auto operator<=>(L, R) noexcept -> constant_wrapper<(L::value <=> R::value)> { return {}; } - template - friend constexpr auto operator<(L, R) noexcept -> constant_wrapper<(L::value < R::value)> { return {}; } - template - friend constexpr auto operator<=(L, R) noexcept -> constant_wrapper<(L::value <= R::value)> { return {}; } - template - friend constexpr auto operator==(L, R) noexcept -> constant_wrapper<(L::value == R::value)> { return {}; } - template - friend constexpr auto operator!=(L, R) noexcept -> constant_wrapper<(L::value != R::value)> { return {}; } - template - friend constexpr auto operator>(L, R) noexcept -> constant_wrapper<(L::value > R::value)> { return {}; } - template - friend constexpr auto operator>=(L, R) noexcept -> constant_wrapper<(L::value >= R::value)> { return {}; } - - template - friend constexpr auto operator->*(L, R) noexcept -> constant_wrapper*R::value> { return {}; } - -# if defined(__cpp_explicit_this_parameter) - // call and index - template - constexpr auto operator()(this T, Args...) noexcept - requires requires(Args...) { constant_wrapper(); } - { return constant_wrapper{}; } - template - constexpr auto operator[](this T, Args...) noexcept -> constant_wrapper<(T::value[Args::value...])> - { return {}; } - - // pseudo-mutators - template - constexpr auto operator++(this T) noexcept requires requires(T::value_type x) { ++x; } - { return constant_wrapper<[] { auto c = T::value; return ++c; }()>{}; } - template - constexpr auto operator++(this T, int) noexcept requires requires(T::value_type x) { x++; } - { return constant_wrapper<[] { auto c = T::value; return c++; }()>{}; } - - template - constexpr auto operator--(this T) noexcept requires requires(T::value_type x) { --x; } - { return constant_wrapper<[] { auto c = T::value; return --c; }()>{}; } - template - constexpr auto operator--(this T, int) noexcept requires requires(T::value_type x) { x--; } - { return constant_wrapper<[] { auto c = T::value; return c--; }()>{}; } - - template - constexpr auto operator+=(this T, R) noexcept requires requires(T::value_type x) { x += R::value; } - { return constant_wrapper<[] { auto v = T::value; return v += R::value; }()>{}; } - template - constexpr auto operator-=(this T, R) noexcept requires requires(T::value_type x) { x -= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v -= R::value; }()>{}; } - template - constexpr auto operator*=(this T, R) noexcept requires requires(T::value_type x) { x *= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v *= R::value; }()>{}; } - template - constexpr auto operator/=(this T, R) noexcept requires requires(T::value_type x) { x /= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v /= R::value; }()>{}; } - template - constexpr auto operator%=(this T, R) noexcept requires requires(T::value_type x) { x %= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v %= R::value; }()>{}; } - template - constexpr auto operator&=(this T, R) noexcept requires requires(T::value_type x) { x &= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v &= R::value; }()>{}; } - template - constexpr auto operator|=(this T, R) noexcept requires requires(T::value_type x) { x |= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v |= R::value; }()>{}; } - template - constexpr auto operator^=(this T, R) noexcept requires requires(T::value_type x) { x ^= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v ^= R::value; }()>{}; } - template - constexpr auto operator<<=(this T, R) noexcept requires requires(T::value_type x) { x <<= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v <<= R::value; }()>{}; } - template - constexpr auto operator>>=(this T, R) noexcept requires requires(T::value_type x) { x >>= R::value; } - { return constant_wrapper<[] { auto v = T::value; return v >>= R::value; }()>{}; } -# endif // __cpp_explicit_this_parameter - }; -} - -template -struct constant_wrapper : exposition_only::cw_operators { - static constexpr const auto & value = X.data; - using type = constant_wrapper; - using value_type = typename decltype(X)::type; - - template - constexpr auto operator=(R) const noexcept - requires requires(value_type x) { x = R::value; } - { - return constant_wrapper< - [] { auto v = value; return v = R::value; }() - >{}; - } - - constexpr operator decltype(auto)() const noexcept { return value; } - constexpr decltype(auto) operator()() const noexcept requires (!std::invocable) { return value; } - -# if defined(__cpp_explicit_this_parameter) - using exposition_only::cw_operators::operator(); -# endif // defined(__cpp_explicit_this_parameter) -}; - -template - constinit auto cw = constant_wrapper{}; - -} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE - -#else // defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - -namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +#if defined(__cpp_lib_constant_wrapper) -// GCC 11.4.0 (C++20) has an internal compiler error (ICE) with -// "typename unspecified = typename decltype(exposition_only::cw_fixed_value(X))::type" -// as second template parameter of constant_wrapper below. -// -// Replacing that expression with use of the following alias doesn't help. -// -// namespace exposition_only { -// template -// using unspecified_t = typename decltype(cw_fixed_value(X))::type; -// } -// -// Moving the definitions of the various specializations of cw_fixed_value -// above this point doesn't help either. -// -// Replacing the offending expression with -// -// typename unspecified = typename decltype(X)::type // exposition only -// -// gets us only part of the way there. The problem ultimately relates to -// GCC 11.4.0 being unable to find the overloaded arithmetic operators. -// Our P3663 implementation doesn't depend on them, so we don't need -// the operators at all. +using std::constant_wrapper; +using std::cw; -// Clang 14 doesn't like the forward declaration of constant_wrapper, -// because it claims that the non-type template parameter X has a different -// type in the definition versus in the declaration. +#else namespace detail { @@ -260,6 +32,6 @@ using constant_wrapper = detail::constant_wrapper_impl; template constexpr auto cw = constant_wrapper{}; -} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE +#endif // __cpp_lib_constant_wrapper -#endif // defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/include/experimental/__p2630_bits/submdspan_extents.hpp b/include/experimental/__p2630_bits/submdspan_extents.hpp index 7e95185d..99d302cf 100644 --- a/include/experimental/__p2630_bits/submdspan_extents.hpp +++ b/include/experimental/__p2630_bits/submdspan_extents.hpp @@ -1014,11 +1014,8 @@ check_canonical_kth_submdspan_slice_type( [[maybe_unused]] Slice slice) { if constexpr (! is_canonical_slice_type()) { -#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) + // Apparent redundancy is just a back-port of static_assert(false). static_assert(is_canonical_slice_type()); -#else - static_assert(false); -#endif } else { // 3.2 static_assert(check_static_bounds(extents{}) != check_static_bounds_result::out_of_bounds); @@ -1069,11 +1066,10 @@ submdspan_canonicalize_one_slice( { // Part of [mdspan.sub.slices] 9. // This could be combined with the if constexpr branches below. -#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - static_assert(check_static_bounds(extents{}) != check_static_bounds_result::out_of_bounds); -#else - static_assert(check_static_bounds(exts) != check_static_bounds_result::out_of_bounds); -#endif + static_assert( + check_static_bounds( + extents{}) != + check_static_bounds_result::out_of_bounds); // TODO Check Precondition that s is a valid k-th submdspan slice for exts. @@ -1084,14 +1080,6 @@ submdspan_canonicalize_one_slice( return canonical_ice(s); // 11.2 } else if constexpr (is_strided_slice::value) { // 11.3 -#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. - return strided_slice{ - .offset = canonical_ice(s.offset), - .extent = canonical_ice(s.extent), - .stride = canonical_ice(s.stride) - }; -#else auto offset = canonical_ice(s.offset); auto extent = canonical_ice(s.extent); auto stride = canonical_ice(s.stride); @@ -1102,18 +1090,9 @@ submdspan_canonicalize_one_slice( /* .extent = */ extent, /* .stride = */ stride }; -#endif } #if ! defined(__cpp_lib_tuple_like) || (__cpp_lib_tuple_like < 202311L) else if constexpr (detail::is_std_complex) { -#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. - return strided_slice{ - .offset = canonical_ice(s.real()), - .extent = canonical_ice(s.imag() - s.real()), - .stride = cw - }; -#else auto offset = canonical_ice(s.real()); auto extent = canonical_ice(s.imag() - s.real()); auto stride = cw; @@ -1124,7 +1103,6 @@ submdspan_canonicalize_one_slice( /* .extent = */ extent, /* .stride = */ stride }; -#endif } #endif else { // 11.4 @@ -1133,14 +1111,7 @@ submdspan_canonicalize_one_slice( using S_k1 = decltype(s_k1); static_assert(std::is_convertible_v); static_assert(std::is_convertible_v); -#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - // GCC 11.4.0 (C++20) accepts this code, but Clang 14 does not. - return strided_slice{ - .offset = canonical_ice(s_k0), - .extent = subtract_ice(s_k0, s_k1), - .stride = cw - }; -#else + auto offset = canonical_ice(s_k0); auto extent = subtract_ice(s_k0, s_k1); auto stride = cw; @@ -1151,7 +1122,6 @@ submdspan_canonicalize_one_slice( /* .extent = */ extent, /* .stride = */ stride }; -#endif } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c6eff3ef..cb849f26 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -33,12 +33,6 @@ function(mdspan_add_test name) MDSPAN_ENABLE_P3663=1 ) endif() - if(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) - target_compile_definitions(${name} - PUBLIC - MDSPAN_CONSTANT_WRAPPER_WORKAROUND=1 - ) - endif() endfunction() if(MDSPAN_USE_SYSTEM_GTEST) diff --git a/tests/test_canonicalize_slices.cpp b/tests/test_canonicalize_slices.cpp index 41f65eb9..7f949ae5 100644 --- a/tests/test_canonicalize_slices.cpp +++ b/tests/test_canonicalize_slices.cpp @@ -40,7 +40,7 @@ class my_nonaggregate_pair { : first_(first), second_(second) {} -#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +#if ! defined(__cpp_lib_constant_wrapper) template constexpr auto get() -> std::conditional_t { if constexpr (Index == 0) { @@ -79,7 +79,7 @@ struct std::tuple_size> template struct std::tuple_element> { -#if defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +#if ! defined(__cpp_lib_constant_wrapper) static_assert(Index == 0 || Index == 1, "Invalid index"); #else static_assert(false, "Invalid index"); diff --git a/tests/test_constant_wrapper.cpp b/tests/test_constant_wrapper.cpp index 3d533b1b..a37f8dc5 100644 --- a/tests/test_constant_wrapper.cpp +++ b/tests/test_constant_wrapper.cpp @@ -24,7 +24,7 @@ namespace { // (anonymous) -#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +#if defined(__cpp_lib_constant_wrapper) template using IC = std::integral_constant; @@ -37,7 +37,7 @@ constexpr void test_integral_constant_wrapper(IC ic) { constexpr auto c = cw; static_assert(std::is_same_v< - decltype(cw), + std::remove_const_t)>, constant_wrapper>); static_assert(decltype(c)::value == Value); static_assert(std::is_same_v< @@ -52,7 +52,7 @@ constexpr void test_integral_constant_wrapper(IC ic) { // any integer promotions (e.g., short + short -> int). constexpr auto val_plus_1 = Integral(Value + Integral(1)); constexpr auto c_assigned = (c2 = IC{}); - static_assert(c_assigned() == val_plus_1); + static_assert(c_assigned == val_plus_1); } TEST(TestConstantWrapper, Construction) { @@ -85,17 +85,17 @@ TEST(TestConstantWrapper, IntegerPlus) { constexpr size_t value3 = decltype(cw_11)(); static_assert(value == value3); -#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +#if defined(__cpp_lib_constant_wrapper) static_assert(std::is_same_v< decltype(cw_11), - decltype(cw)>); + std::remove_const_t)>>); #endif [[maybe_unused]] auto expected_result = cw; using expected_type = constant_wrapper; static_assert(std::is_same_v); -#if ! defined(MDSPAN_CONSTANT_WRAPPER_WORKAROUND) +#if defined(__cpp_lib_constant_wrapper) [[maybe_unused]] auto cw_11_plus_one = cw_11 + cw; [[maybe_unused]] auto one_plus_cw_11 = cw + cw_11;