diff --git a/examples/aligned_accessor/aligned_accessor.cpp b/examples/aligned_accessor/aligned_accessor.cpp index 60f746c8..19c43e5d 100644 --- a/examples/aligned_accessor/aligned_accessor.cpp +++ b/examples/aligned_accessor/aligned_accessor.cpp @@ -34,6 +34,10 @@ // https://github.com/rapidsai/raft/pull/725#discussion_r937991701 namespace { +using Kokkos::aligned_accessor; +using Kokkos::detail::aligned_pointer_t; +using Kokkos::detail::assume_aligned_method; +using Kokkos::detail::align_attribute_method; using test_value_type = float; constexpr std::size_t min_overalignment_factor = 8; @@ -43,94 +47,6 @@ constexpr std::size_t min_byte_alignment = min_overalignment_factor * sizeof(flo // Some compilers have trouble optimizing loops with unsigned or 64-bit index types. using index_type = int; - -// Prefer std::assume_aligned if available, as it is in the C++ Standard. -// Otherwise, use a compiler-specific equivalent if available. - -// NOTE (mfh 2022/08/08) BYTE_ALIGNMENT must be unsigned and a power of 2. -#if defined(__cpp_lib_assume_aligned) -# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) (std::assume_aligned< BYTE_ALIGNMENT >( POINTER )) - constexpr char assume_aligned_method[] = "std::assume_aligned"; -#elif defined(__ICL) -# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER - constexpr char assume_aligned_method[] = "(none)"; -#elif defined(__ICC) -# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER - constexpr char assume_aligned_method[] = "(none)"; -#elif defined(__clang__) -# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER - constexpr char assume_aligned_method[] = "(none)"; -#elif defined(__GNUC__) - // __builtin_assume_aligned returns void* -# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) reinterpret_cast< ELEMENT_TYPE* >(__builtin_assume_aligned( POINTER, BYTE_ALIGNMENT )) - constexpr char assume_aligned_method[] = "__builtin_assume_aligned"; -#else -# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER - constexpr char assume_aligned_method[] = "(none)"; -#endif - -// Some compilers other than Clang or GCC like to define __clang__ or __GNUC__. -// Thus, we order the tests from most to least specific. -#if defined(__ICL) -# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __declspec(align_value( BYTE_ALIGNMENT )) - constexpr char align_attribute_method[] = "__declspec(align_value(BYTE_ALIGNMENT))"; -#elif defined(__ICC) -# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __attribute__((align_value( BYTE_ALIGNMENT ))) - constexpr char align_attribute_method[] = "__attribute__((align_value(BYTE_ALIGNMENT)))"; -#elif defined(__clang__) -# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __attribute__((align_value( BYTE_ALIGNMENT ))) - constexpr char align_attribute_method[] = "__attribute__((align_value(BYTE_ALIGNMENT)))"; -#else -# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) - constexpr char align_attribute_method[] = "(none)"; -#endif - -constexpr bool -is_nonzero_power_of_two(const std::size_t x) -{ -// Just checking __cpp_lib_int_pow2 isn't enough for some GCC versions. -// The header exists, but std::has_single_bit does not. -#if defined(__cpp_lib_int_pow2) && __cplusplus >= 202002L - return std::has_single_bit(x); -#else - return x != 0 && (x & (x - 1)) == 0; -#endif -} - -template -constexpr bool -valid_byte_alignment(const std::size_t byte_alignment) -{ - return is_nonzero_power_of_two(byte_alignment) && byte_alignment >= alignof(ElementType); -} - -// We define aligned_pointer_t through a struct -// so we can check whether the byte alignment is valid. -// This makes it impossible to use the alias -// with an invalid byte alignment. -template -struct aligned_pointer { - static_assert(valid_byte_alignment(byte_alignment), - "byte_alignment must be a power of two no less than " - "the minimum required alignment of ElementType."); - -#if defined(__ICC) - // x86-64 ICC 2021.5.0 emits warning #3186 ("expected typedef declaration") here. - // No other compiler (including Clang, which has a similar type attribute) has this issue. -# pragma warning push -# pragma warning disable 3186 -#endif - - using type = ElementType* MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( byte_alignment ); - -#if defined(__ICC) -# pragma warning pop -#endif -}; - -template -using aligned_pointer_t = typename aligned_pointer::type; - template aligned_pointer_t bless(ElementType* ptr, std::integral_constant /* ba */ ) @@ -138,34 +54,6 @@ bless(ElementType* ptr, std::integral_constant /* b return MDSPAN_IMPL_ASSUME_ALIGNED( ElementType, ptr, byte_alignment ); } -template -struct aligned_accessor { - using offset_policy = Kokkos::default_accessor; - using element_type = ElementType; - using reference = ElementType&; - using data_handle_type = aligned_pointer_t; - - constexpr aligned_accessor() noexcept = default; - - MDSPAN_TEMPLATE_REQUIRES( - class OtherElementType, - std::size_t other_byte_alignment, - /* requires */ (std::is_convertible::value && other_byte_alignment == byte_alignment) - ) - constexpr aligned_accessor(aligned_accessor) noexcept {} - - constexpr reference access(data_handle_type p, size_t i) const noexcept { - // This may declare alignment twice, depending on - // if we have an attribute for marking pointer types. - return MDSPAN_IMPL_ASSUME_ALIGNED( ElementType, p, byte_alignment )[i]; - } - - constexpr typename offset_policy::data_handle_type - offset(data_handle_type p, size_t i) const noexcept { - return p + i; - } -}; - template struct delete_raw { void operator()(ElementType* p) const { diff --git a/include/experimental/__p0009_bits/aligned_accessor.hpp b/include/experimental/__p0009_bits/aligned_accessor.hpp new file mode 100644 index 00000000..f85cf613 --- /dev/null +++ b/include/experimental/__p0009_bits/aligned_accessor.hpp @@ -0,0 +1,209 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +// NOTE: This code is prematurely taken from an example based on +// https://github.com/kokkos/mdspan/pull/176 + +#pragma once + +#include "macros.hpp" +#include "trait_backports.hpp" +#include "default_accessor.hpp" +#include "extents.hpp" +#include +#include +#include +#include +#include + +// If we don't have bitcast, we should use memcpy +#ifndef __cpp_lib_bit_cast +#include +#endif + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +// Prefer std::assume_aligned if available, as it is in the C++ Standard. +// Otherwise, use a compiler-specific equivalent if available. + +// NOTE (mfh 2022/08/08) BYTE_ALIGNMENT must be unsigned and a power of 2. +#if defined(__cpp_lib_assume_aligned) +# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) (std::assume_aligned< BYTE_ALIGNMENT >( POINTER )) + constexpr char assume_aligned_method[] = "std::assume_aligned"; +#elif defined(__ICL) +# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER + constexpr char assume_aligned_method[] = "(none)"; +#elif defined(__ICC) +# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER + constexpr char assume_aligned_method[] = "(none)"; +#elif defined(__clang__) +# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER + constexpr char assume_aligned_method[] = "(none)"; +#elif defined(__GNUC__) + // __builtin_assume_aligned returns void* +# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) reinterpret_cast< ELEMENT_TYPE* >(__builtin_assume_aligned( POINTER, BYTE_ALIGNMENT )) + constexpr char assume_aligned_method[] = "__builtin_assume_aligned"; +#else +# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER + constexpr char assume_aligned_method[] = "(none)"; +#endif + +// Some compilers other than Clang or GCC like to define __clang__ or __GNUC__. +// Thus, we order the tests from most to least specific. +#if defined(__ICL) +# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __declspec(align_value( BYTE_ALIGNMENT )) + constexpr char align_attribute_method[] = "__declspec(align_value(BYTE_ALIGNMENT))"; +#elif defined(__ICC) +# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __attribute__((align_value( BYTE_ALIGNMENT ))) + constexpr char align_attribute_method[] = "__attribute__((align_value(BYTE_ALIGNMENT)))"; +#elif defined(__clang__) +# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __attribute__((align_value( BYTE_ALIGNMENT ))) + constexpr char align_attribute_method[] = "__attribute__((align_value(BYTE_ALIGNMENT)))"; +#else +# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) + constexpr char align_attribute_method[] = "(none)"; +#endif + +constexpr bool +is_nonzero_power_of_two(const std::size_t x) +{ +// Just checking __cpp_lib_int_pow2 isn't enough for some GCC versions. +// The header exists, but std::has_single_bit does not. +#if defined(__cpp_lib_int_pow2) && __cplusplus >= 202002L + return std::has_single_bit(x); +#else + return x != 0 && (x & (x - 1)) == 0; +#endif +} + +template +constexpr bool +valid_byte_alignment(const std::size_t byte_alignment) +{ + return is_nonzero_power_of_two(byte_alignment) && byte_alignment >= alignof(ElementType); +} + +// We define aligned_pointer_t through a struct +// so we can check whether the byte alignment is valid. +// This makes it impossible to use the alias +// with an invalid byte alignment. +template +struct aligned_pointer { + static_assert(valid_byte_alignment(Alignment), + "Alignment must be a power of two no less than " + "the minimum required alignment of T."); + using type = T* MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( Alignment ); +}; + + +template +using aligned_pointer_t = typename aligned_pointer::type; +} // namespace detail + +template +#ifdef __cpp_lib_bit_cast // Only can be constexpr if we have bit_cast +constexpr +#endif +bool is_sufficiently_aligned(T* ptr) { + // Note this mandate is not what is currently in the standard + // See https://cplusplus.github.io/LWG/issue4290 + static_assert(detail::valid_byte_alignment(Alignment), + "Alignment must be a power of two no less than " + "the minimum required alignment of T."); +#ifdef __cpp_lib_bit_cast + auto dst = std::bit_cast(ptr); +#else + // Will work but non-constexpr + std::uintptr_t dst; + std::memcpy(&dst, &ptr, sizeof(std::uintptr_t)); +#endif + return !(dst & (Alignment - 1)); +} + +template +struct aligned_accessor { + using offset_policy = default_accessor; + using element_type = ElementType; + using reference = ElementType&; + using data_handle_type = detail::aligned_pointer_t; + + static constexpr size_t byte_alignment = ByteAlignment; + + constexpr aligned_accessor() noexcept = default; + + MDSPAN_TEMPLATE_REQUIRES( + class OtherElementType, + std::size_t OtherByteAlignment, + /* requires */ (std::is_convertible::value && OtherByteAlignment >= byte_alignment) + ) + constexpr aligned_accessor(aligned_accessor) noexcept {} + + MDSPAN_TEMPLATE_REQUIRES( + class OtherElementType, + /* requires */ (std::is_convertible::value) + ) + constexpr explicit aligned_accessor(default_accessor) noexcept {} + + MDSPAN_TEMPLATE_REQUIRES( + class OtherElementType, + /* requires */ (std::is_convertible::value) + ) + constexpr operator default_accessor() const noexcept { + return {}; + } + + constexpr reference access(data_handle_type p, size_t i) const noexcept { + // This may declare alignment twice, depending on + // if we have an attribute for marking pointer types. + return MDSPAN_IMPL_ASSUME_ALIGNED( ElementType, p, byte_alignment )[i]; + } + + constexpr typename offset_policy::data_handle_type + offset(data_handle_type p, size_t i) const noexcept { + return p + i; + } +}; + +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/include/mdspan/mdspan.hpp b/include/mdspan/mdspan.hpp index 4a0e354f..0b4e7d09 100644 --- a/include/mdspan/mdspan.hpp +++ b/include/mdspan/mdspan.hpp @@ -34,6 +34,7 @@ #include "../experimental/__p0009_bits/layout_left.hpp" #include "../experimental/__p0009_bits/layout_right.hpp" #include "../experimental/__p0009_bits/macros.hpp" +#include "../experimental/__p0009_bits/aligned_accessor.hpp" #if MDSPAN_HAS_CXX_17 #include "../experimental/__p2642_bits/layout_padded.hpp" #include "../experimental/__p2630_bits/submdspan.hpp" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b038ebcc..c60a08c2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -33,7 +33,7 @@ if(MDSPAN_USE_SYSTEM_GTEST) find_package(GTest CONFIG REQUIRED) else() include(FetchContent) - + if (MSVC) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) endif() @@ -42,14 +42,14 @@ else() GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG v1.17.0 ) - + # TODO CMake 3.28, we can pass EXCLUDE_FROM_ALL directly to fetchcontent_makeavailable fetchcontent_getproperties(googletest) if (NOT googletest_POPULATED) fetchcontent_populate(googletest) add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL) endif() - + add_library(GTest::gtest_main ALIAS gtest_main) endif() @@ -94,6 +94,7 @@ if(NOT CMAKE_CXX_STANDARD STREQUAL "14") mdspan_add_test(test_layout_padded_left ENABLE_PRECONDITIONS) mdspan_add_test(test_layout_padded_right ENABLE_PRECONDITIONS) endif() +mdspan_add_test(test_aligned_accessor) # both of those don't work yet since its using vector if(NOT MDSPAN_ENABLE_CUDA AND NOT MDSPAN_ENABLE_HIP) mdspan_add_test(test_mdarray_ctors) diff --git a/tests/test_aligned_accessor.cpp b/tests/test_aligned_accessor.cpp new file mode 100644 index 00000000..9671fb59 --- /dev/null +++ b/tests/test_aligned_accessor.cpp @@ -0,0 +1,93 @@ + +#include +#include +#include +#include + +TEST(TestAlignedAccessor, IsSufficientlyAligned) { + ASSERT_TRUE( + Kokkos::is_sufficiently_aligned<1>(reinterpret_cast(0x12345678))); + ASSERT_TRUE( + Kokkos::is_sufficiently_aligned<1>(reinterpret_cast(0x12345671))); + + ASSERT_TRUE(Kokkos::is_sufficiently_aligned<2>( + reinterpret_cast(0x12345678))); + ASSERT_TRUE(!Kokkos::is_sufficiently_aligned<2>( + reinterpret_cast(0x12345671))); + + ASSERT_TRUE(Kokkos::is_sufficiently_aligned<4>( + reinterpret_cast(0x12345678))); + ASSERT_TRUE(Kokkos::is_sufficiently_aligned<4>( + reinterpret_cast(0x12345674))); + ASSERT_TRUE(Kokkos::is_sufficiently_aligned<4>( + reinterpret_cast(0x1234567c))); + ASSERT_TRUE(!Kokkos::is_sufficiently_aligned<4>( + reinterpret_cast(0x12345672))); + ASSERT_TRUE(!Kokkos::is_sufficiently_aligned<4>( + reinterpret_cast(0x12345671))); + ASSERT_TRUE(!Kokkos::is_sufficiently_aligned<4>( + reinterpret_cast(0x12345677))); +} + +// These shouldn't be in the global namespace or they may replace the C version +namespace testing +{ + // https://stackoverflow.com/questions/62962839/stdaligned-alloc-missing-from-visual-studio-2019 + void *aligned_alloc(std::size_t alignment, std::size_t size) { + #ifdef _MSC_VER + return _aligned_malloc(size, alignment); // The arguments are reversed apparently :D + #else + return std::aligned_alloc(alignment, size); + #endif + } + + void aligned_free(void *ptr) { + #ifdef _MSC_VER + _aligned_free(ptr); + #else + std::free(ptr); + #endif + } +} + +template +void test_aligned_accessor() { + using mdspan_type = + Kokkos::mdspan, + Kokkos::layout_right, + Kokkos::aligned_accessor>; + auto *buff = reinterpret_cast( + testing::aligned_alloc(ByteAlignment, NumElements * sizeof(double))); + std::iota(buff, buff + NumElements, 0); + ASSERT_TRUE(Kokkos::is_sufficiently_aligned(buff)); + + auto md = mdspan_type(buff); + + for (std::size_t i = 0; i < NumElements; ++i) + ASSERT_TRUE(md[i] == static_cast(i)); + + // Accessor should be convertible to a default one + auto md2 = Kokkos::mdspan>(md); + ASSERT_TRUE(Kokkos::is_sufficiently_aligned(md2.data_handle())); + + for (std::size_t i = 0; i < NumElements; ++i) + ASSERT_TRUE(md2[i] == static_cast(i)); + + // Get an offset for the submdspan that should be of the correct alignment + const std::size_t offset = ByteAlignment / sizeof(double); + static_assert( NumElements > 2 + offset ); + auto md3 = Kokkos::submdspan(md, std::pair{ offset, NumElements - 2 } ); + ASSERT_TRUE(Kokkos::is_sufficiently_aligned(md3.data_handle())); + + for (std::size_t i = 0; i < NumElements - 2 - offset; ++i ) + ASSERT_TRUE(md3[i] == static_cast(i + offset)); + + testing::aligned_free(buff); +} + +TEST(TestAlignedAccessor, AlignedAccessor) { + test_aligned_accessor(); + test_aligned_accessor(); + test_aligned_accessor<2 * alignof(double), 10>(); + test_aligned_accessor<4 * alignof(double), 10>(); +}