diff --git a/docs/libcudacxx/extended_api/mdspan.rst b/docs/libcudacxx/extended_api/mdspan.rst index ca0582fa0d5..f0b29f3d1d0 100644 --- a/docs/libcudacxx/extended_api/mdspan.rst +++ b/docs/libcudacxx/extended_api/mdspan.rst @@ -11,6 +11,7 @@ Mdspan mdspan/restrict_accessor mdspan/shared_memory_accessor mdspan/mdspan_to_dlpack + mdspan/dlpack_to_mdspan .. list-table:: :widths: 25 45 30 30 @@ -40,3 +41,8 @@ Mdspan - Convert a ``mdspan`` to a ``DLTensor`` - CCCL 3.2.0 - CUDA 13.2 + + * - :ref:`dlpack to mdspan ` + - Convert a ``DLTensor`` to a ``mdspan`` + - CCCL 3.2.0 + - CUDA 13.2 diff --git a/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst b/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst new file mode 100644 index 00000000000..daa4d7df7fc --- /dev/null +++ b/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst @@ -0,0 +1,138 @@ +.. _libcudacxx-extended-api-mdspan-dlpack-to-mdspan: + +DLPack to ``mdspan`` +==================== + +This functionality provides a conversion from `DLPack `__ ``DLTensor`` to ``cuda::host_mdspan``, ``cuda::device_mdspan``, and ``cuda::managed_mdspan``. + +Defined in the ```` header. + +Conversion functions +-------------------- + +.. code:: cuda + + namespace cuda { + + template + [[nodiscard]] cuda::host_mdspan, LayoutPolicy> + to_host_mdspan(const DLTensor& tensor); + + template + [[nodiscard]] cuda::device_mdspan, LayoutPolicy> + to_device_mdspan(const DLTensor& tensor); + + template + [[nodiscard]] cuda::managed_mdspan, LayoutPolicy> + to_managed_mdspan(const DLTensor& tensor); + + } // namespace cuda + +Template parameters +------------------- + +- ``ElementType``: The element type of the resulting ``mdspan``. Must match the ``DLTensor::dtype``. +- ``Rank``: The number of dimensions. Must match ``DLTensor::ndim``. +- ``LayoutPolicy``: The layout policy for the resulting ``mdspan``. Defaults to ``cuda::std::layout_stride``. Supported layouts are: + + - ``cuda::std::layout_right`` (C-contiguous, row-major) + - ``cuda::std::layout_left`` (Fortran-contiguous, column-major) + - ``cuda::std::layout_stride`` (general strided layout) + +Semantics +--------- + +The conversion produces a non-owning ``mdspan`` view of the ``DLTensor`` data: + +- The ``mdspan`` data pointer is computed as ``static_cast(tensor.data) + tensor.byte_offset``. +- For ``rank > 0``, ``mdspan.extent(i)`` is ``tensor.shape[i]``. +- For ``layout_stride``, ``mdspan.stride(i)`` is ``tensor.strides[i]`` (or computed as row-major if ``strides`` is ``nullptr`` for DLPack < v1.2). +- The device type is validated: + + - ``kDLCPU`` for ``to_host_mdspan`` + - ``kDLCUDA`` for ``to_device_mdspan`` + - ``kDLCUDAManaged`` for ``to_managed_mdspan`` + +Supported element types: + +- ``bool``. +- Signed and unsigned integers. +- IEEE-754 Floating-point and extended precision floating-point, including ``__half``, ``__nv_bfloat16``, ``__float128``, FP8, FP6, FP4 when available. +- Complex: ``cuda::std::complex<__half>``, ``cuda::std::complex``, and ``cuda::std::complex``. +- `CUDA built-in vector types `__, such as ``int2``, ``float4``, etc. +- Vector types for extended floating-point, such as ``__half2``, ``__nv_fp8x4_e4m3``, etc. + +Constraints +----------- + +- ``LayoutPolicy`` must be one of ``cuda::std::layout_right``, ``cuda::std::layout_left``, or ``cuda::std::layout_stride``. +- For ``layout_right`` and ``layout_left``, the ``DLTensor`` strides must be compatible with the layout. + +Runtime errors +-------------- + +The conversion throws ``std::invalid_argument`` in the following cases: + +- ``DLTensor::ndim`` does not match the specified ``Rank``. +- ``DLTensor::dtype`` does not match ``ElementType``. +- ``DLTensor::data`` is ``nullptr``. +- ``DLTensor::shape`` is ``nullptr`` (for rank > 0). +- Any ``DLTensor::shape[i]`` is negative. +- ``DLTensor::strides`` is ``nullptr`` for DLPack v1.2 or later. +- ``DLTensor::strides`` is ``nullptr`` for ``layout_left`` with rank > 1 (DLPack < v1.2). +- ``DLTensor::strides[i]`` is not positive for ``layout_stride``. +- ``DLTensor::strides`` are not compatible with the requested ``layout_right`` or ``layout_left``. +- ``DLTensor::device.device_type`` does not match the target mdspan type. +- Data pointer is not properly aligned for the element type. + +Availability notes +------------------ + +- This API is available only when DLPack header is present, namely ```` is found in the include path. +- This API can be disabled by defining ``CCCL_DISABLE_DLPACK`` before including any library headers. In this case, ```` will not be included. + +References +---------- + +- `DLPack C API `__ documentation. + +Example +------- + +.. code:: cuda + + #include + #include + #include + #include + + int main() { + int data[6] = {0, 1, 2, 3, 4, 5}; + + // Create a DLTensor manually for demonstration + int64_t shape[2] = {2, 3}; + int64_t strides[2] = {3, 1}; // row-major strides + + DLTensor tensor{}; + tensor.data = data; + tensor.device = {kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{kDLInt, 32, 1}; + tensor.shape = shape; + tensor.strides = strides; + tensor.byte_offset = 0; + + // Convert to host_mdspan + auto md = cuda::to_host_mdspan(tensor); + + assert(md.rank() == 2); + assert(md.extent(0) == 2 && md.extent(1) == 3); + assert(md.stride(0) == 3 && md.stride(1) == 1); + assert(md.data_handle() == data); + assert(md(0, 0) == 0 && md(1, 2) == 5); + } + +See also +-------- + +- :ref:`libcudacxx-extended-api-mdspan-mdspan-to-dlpack` for the reverse conversion. diff --git a/libcudacxx/include/cuda/__internal/dlpack.h b/libcudacxx/include/cuda/__internal/dlpack.h index 61fb5dfcd2a..5af47c34b08 100644 --- a/libcudacxx/include/cuda/__internal/dlpack.h +++ b/libcudacxx/include/cuda/__internal/dlpack.h @@ -26,7 +26,7 @@ # include # define _CCCL_DLPACK_AT_LEAST(_MAJOR, _MINOR) \ - (DLPACK_MAJOR_VERSION > (_MAJOR) || (DLPACK_MAJOR_VERSION == (_MAJOR) && DLPACK_VERSION_MINOR >= (_MINOR))) + (DLPACK_MAJOR_VERSION > (_MAJOR) || (DLPACK_MAJOR_VERSION == (_MAJOR) && DLPACK_MINOR_VERSION >= (_MINOR))) # define _CCCL_DLPACK_BELOW(_MAJOR, _MINOR) (!_CCCL_DLPACK_AT_LEAST(_MAJOR, _MINOR)) # if DLPACK_MAJOR_VERSION != 1 diff --git a/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h b/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h new file mode 100644 index 00000000000..565bb9894c0 --- /dev/null +++ b/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h @@ -0,0 +1,270 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H +#define _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if _CCCL_HAS_DLPACK() + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# if !_CCCL_COMPILER(NVRTC) +# include +# endif // !_CCCL_COMPILER(NVRTC) + +# include +// +# include + +_CCCL_BEGIN_NAMESPACE_CUDA + +template +[[nodiscard]] _CCCL_HOST_API inline bool __validate_dlpack_data_type(const ::DLDataType& __dtype) noexcept +{ + const auto __expected = ::cuda::__data_type_to_dlpack<_ElementType>(); + return __dtype.code == __expected.code && __dtype.bits == __expected.bits && __dtype.lanes == __expected.lanes; +} + +[[nodiscard]] +_CCCL_HOST_API inline ::cuda::std::int64_t +__get_layout_right_stride(const ::cuda::std::int64_t* __shapes, ::cuda::std::size_t __pos, ::cuda::std::size_t __rank) +{ + ::cuda::std::int64_t __stride = 1; + for (auto __i = __pos + 1; __i < __rank; ++__i) + { + // TODO: replace with mul_overflow + if (const auto __hi = ::cuda::mul_hi(__stride, __shapes[__i]); __hi != 0 && __hi != -1) + { + _CCCL_THROW(::std::invalid_argument{"shape overflow"}); + } + __stride *= __shapes[__i]; // TODO: check for overflow + } + return __stride; +} + +[[nodiscard]] +_CCCL_HOST_API inline ::cuda::std::int64_t +__get_layout_left_stride(const ::cuda::std::int64_t* __shapes, ::cuda::std::size_t __pos) +{ + ::cuda::std::int64_t __stride = 1; + for (::cuda::std::size_t __i = 0; __i < __pos; ++__i) + { + // TODO: replace with mul_overflow + if (const auto __hi = ::cuda::mul_hi(__stride, __shapes[__i]); __hi != 0 && __hi != -1) + { + _CCCL_THROW(::std::invalid_argument{"shape overflow"}); + } + __stride *= __shapes[__i]; + } + return __stride; +} + +template +_CCCL_HOST_API void __validate_dlpack_strides(const ::DLTensor& __tensor, [[maybe_unused]] ::cuda::std::size_t __rank) +{ + [[maybe_unused]] constexpr bool __is_layout_right = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_right>; + [[maybe_unused]] constexpr bool __is_layout_left = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_left>; + [[maybe_unused]] constexpr bool __is_layout_stride = + ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_stride>; + const auto __strides_ptr = __tensor.strides; + if (__strides_ptr == nullptr) + { +# if _CCCL_DLPACK_AT_LEAST(1, 2) + _CCCL_THROW(::std::invalid_argument{"strides=nullptr is not supported for DLPack v1.2 and later"}); +# else + // strides == nullptr means row-major (C-contiguous) layout + if (__is_layout_left && __rank > 1) + { + _CCCL_THROW(::std::invalid_argument{"strides must be non-null for layout_left"}); + } + else + { + return; + } +# endif // _CCCL_DLPACK_AT_LEAST(1, 2) + } + for (::cuda::std::size_t __pos = 0; __pos < __rank; ++__pos) + { + if constexpr (__is_layout_right) + { + if (__strides_ptr[__pos] != ::cuda::__get_layout_right_stride(__tensor.shape, __pos, __rank)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor strides are not compatible with layout_right"}); + } + } + else if constexpr (__is_layout_left) + { + if (__strides_ptr[__pos] != ::cuda::__get_layout_left_stride(__tensor.shape, __pos)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor strides are not compatible with layout_left"}); + } + } + else if constexpr (__is_layout_stride) + { + if (__strides_ptr[__pos] <= 0) + { + _CCCL_THROW(::std::invalid_argument{"layout_stride requires strictly positive strides"}); + } + } + } +} + +template +[[nodiscard]] +_CCCL_HOST_API ::cuda::std::mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy> +__to_mdspan(const ::DLTensor& __tensor) +{ + using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>; + using __mdspan_type = ::cuda::std::mdspan<_ElementType, __extents_type, _LayoutPolicy>; + using __mapping_type = typename _LayoutPolicy::template mapping<__extents_type>; + using __element_type = typename __mdspan_type::element_type; + constexpr bool __is_layout_right = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_right>; + constexpr bool __is_layout_left = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_left>; + constexpr bool __is_layout_stride = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_stride>; + // TODO: add support for layout_stride_relaxed, layout_right_padded, layout_left_padded + if constexpr (!__is_layout_right && !__is_layout_left && !__is_layout_stride) + { + static_assert(::cuda::std::__always_false_v<_LayoutPolicy>, "Unsupported layout policy"); + return __mdspan_type{}; + } + else + { + if (cuda::std::cmp_not_equal(__tensor.ndim, _Rank)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor rank does not match expected rank"}); + } + if (!::cuda::__validate_dlpack_data_type<__element_type>(__tensor.dtype)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor data type does not match expected type"}); + } + if (__tensor.data == nullptr) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor data must be non-null"}); + } + auto __base_data = static_cast(__tensor.data) + __tensor.byte_offset; + auto __data = reinterpret_cast<__element_type*>(__base_data); + const auto __datatype_size = __tensor.dtype.bits * __tensor.dtype.lanes / 8; + // this is not the exact solution because data type size != data type alignment. + // However, it always works for the supported data types. + if (__datatype_size > 0 && !::cuda::is_aligned(__data, __datatype_size)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor data must be aligned to the data type"}); + } + if constexpr (_Rank == 0) + { + return __mdspan_type{__data, __mapping_type{}}; + } + else // Rank > 0 + { + if (__tensor.shape == nullptr) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor shape must be non-null"}); + } + ::cuda::std::array<::cuda::std::int64_t, _Rank> __extents_array{}; + for (::cuda::std::size_t __i = 0; __i < _Rank; ++__i) + { + if (__tensor.shape[__i] < 0) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor shapes must be positive"}); + } + __extents_array[__i] = __tensor.shape[__i]; + } + ::cuda::__validate_dlpack_strides<_LayoutPolicy>(__tensor, _Rank); + if constexpr (__is_layout_stride) + { + ::cuda::std::array<::cuda::std::int64_t, _Rank> __strides_array{}; + for (::cuda::std::size_t __i = 0; __i < _Rank; ++__i) + { + const bool __has_strides = __tensor.strides != nullptr; + __strides_array[__i] = + __has_strides ? __tensor.strides[__i] : ::cuda::__get_layout_right_stride(__tensor.shape, __i, _Rank); + } + return __mdspan_type{__data, __mapping_type{__extents_array, __strides_array}}; + } + else + { + return __mdspan_type{__data, __extents_type{__extents_array}}; + } + } + } +} + +/*********************************************************************************************************************** + * Public API + **********************************************************************************************************************/ + +template +[[nodiscard]] +_CCCL_HOST_API ::cuda::host_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy> +to_host_mdspan(const ::DLTensor& __tensor) +{ + if (__tensor.device.device_type != ::kDLCPU) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCPU for host_mdspan"}); + } + using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>; + using __mdspan_type = ::cuda::host_mdspan<_ElementType, __extents_type, _LayoutPolicy>; + return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)}; +} + +template +[[nodiscard]] +_CCCL_HOST_API ::cuda::device_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy> +to_device_mdspan(const ::DLTensor& __tensor) +{ + if (__tensor.device.device_type != ::kDLCUDA) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCUDA for device_mdspan"}); + } + using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>; + using __mdspan_type = ::cuda::device_mdspan<_ElementType, __extents_type, _LayoutPolicy>; + return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)}; +} + +template +[[nodiscard]] +_CCCL_HOST_API ::cuda::managed_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy> +to_managed_mdspan(const ::DLTensor& __tensor) +{ + if (__tensor.device.device_type != ::kDLCUDAManaged) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCUDAManaged for managed_mdspan"}); + } + using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>; + using __mdspan_type = ::cuda::managed_mdspan<_ElementType, __extents_type, _LayoutPolicy>; + return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)}; +} + +_CCCL_END_NAMESPACE_CUDA + +# include + +#endif // __CCCL_HAS_DLPACK() +#endif // _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H diff --git a/libcudacxx/include/cuda/mdspan b/libcudacxx/include/cuda/mdspan index 3129198d02a..f8e36e75e43 100644 --- a/libcudacxx/include/cuda/mdspan +++ b/libcudacxx/include/cuda/mdspan @@ -21,6 +21,7 @@ # pragma system_header #endif // no system header +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.exceptions.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.exceptions.pass.cpp new file mode 100644 index 00000000000..8fc6e55839c --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.exceptions.pass.cpp @@ -0,0 +1,363 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: nvrtc + +#include +#include +#include +#include + +#include + +#include "test_macros.h" +#include + +template +using dlpack_array = cuda::std::array; + +//---------------------------------------------------------------------------------------------------------------------- +// Exception tests + +void test_exception_wrong_rank() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {3, 1}; // row-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + // Try to convert rank-2 tensor to rank-1 mdspan + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_wrong_dtype() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; // dtype is int + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + // Try to convert int tensor to float mdspan + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_null_data() +{ + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = nullptr; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_null_shape() +{ + cuda::std::array data{}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = nullptr; // null shape + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_negative_shape() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {-3}; // negative shape + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_wrong_device_type_host() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{::kDLCUDA, 0}; // CUDA device, not CPU + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_wrong_device_type_device() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; // CPU device, not CUDA + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_device_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_wrong_device_type_managed() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; // CPU device, not CUDA managed + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_managed_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_stride_mismatch_layout_right() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {1, 2}; // Column-major, not row-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLFloat, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_stride_mismatch_layout_left() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {3, 1}; // Row-major, not column-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLFloat, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_zero_stride_layout_stride() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {0, 1}; // Zero stride is invalid + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_null_strides_dlpack_v12() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLFloat, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = nullptr; // null strides not allowed in DLPack v1.2+ + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_misaligned_data() +{ + // Create a buffer that allows us to get a misaligned pointer + alignas(16) cuda::std::array buffer{}; + // Get a pointer that's 1 byte into the buffer (misaligned for int) + auto misaligned_ptr = reinterpret_cast(buffer.data() + 1); + dlpack_array<1> shape = {3}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = misaligned_ptr; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +bool test_exceptions() +{ + test_exception_wrong_rank(); + test_exception_wrong_dtype(); + test_exception_null_data(); + test_exception_null_shape(); + test_exception_negative_shape(); + test_exception_wrong_device_type_host(); + test_exception_wrong_device_type_device(); + test_exception_wrong_device_type_managed(); + test_exception_stride_mismatch_layout_right(); + test_exception_stride_mismatch_layout_left(); + test_exception_zero_stride_layout_stride(); +#if DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2) + test_exception_null_strides_dlpack_v12(); +#endif + test_exception_misaligned_data(); + return true; +} + +int main(int, char**) +{ + NV_IF_TARGET(NV_IS_HOST, (assert(test_exceptions());)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp new file mode 100644 index 00000000000..c2d4b7fdd94 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp @@ -0,0 +1,554 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: nvrtc + +#include +#include +#include +#include +#include + +#include + +#include "test_macros.h" +#include + +template +using dlpack_array = cuda::std::array; + +//---------------------------------------------------------------------------------------------------------------------- +// Rank-0 mdspan conversion + +bool test_rank0() +{ + float data = 42.0f; + DLTensor tensor{}; + tensor.data = &data; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 0; + tensor.dtype = DLDataType{DLDataTypeCode::kDLFloat, 32, 1}; + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 0); + assert(host_mdspan.size() == 1); + assert(host_mdspan.data_handle() == &data); + assert(host_mdspan() == 42.0f); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Empty tensor (zero in one dimension) + +bool test_empty_tensor_layout_right_first_dim_zero() +{ + int dummy = 0; // Non-null but won't be accessed + dlpack_array<2> shape = {0, 5}; + dlpack_array<2> strides = {5, 1}; // row-major + DLTensor tensor{}; + tensor.data = &dummy; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.extent(0) == 0); + assert(host_mdspan.extent(1) == 5); + assert(host_mdspan.size() == 0); + assert(host_mdspan.empty()); + return true; +} + +bool test_empty_tensor_layout_right_second_dim_zero() +{ + int dummy = 0; // Non-null but won't be accessed + dlpack_array<2> shape = {2, 0}; + dlpack_array<2> strides = {0, 1}; // row-major: stride[0] = 0 * 1 = 0, stride[1] = 1 + DLTensor tensor{}; + tensor.data = &dummy; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 0); + assert(host_mdspan.size() == 0); + assert(host_mdspan.empty()); + return true; +} + +bool test_empty_tensor_layout_left_first_dim_zero() +{ + int dummy = 0; // Non-null but won't be accessed + dlpack_array<2> shape = {0, 5}; + dlpack_array<2> strides = {1, 0}; // column-major: stride[0] = 1, stride[1] = 0 * 1 = 0 + DLTensor tensor{}; + tensor.data = &dummy; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.extent(0) == 0); + assert(host_mdspan.extent(1) == 5); + assert(host_mdspan.size() == 0); + assert(host_mdspan.empty()); + return true; +} + +bool test_empty_tensor_layout_stride_explicit_strides() +{ + int dummy = 0; // Non-null but won't be accessed + dlpack_array<2> shape = {0, 5}; + dlpack_array<2> strides = {5, 1}; // explicit strides + DLTensor tensor{}; + tensor.data = &dummy; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.extent(0) == 0); + assert(host_mdspan.extent(1) == 5); + assert(host_mdspan.stride(0) == 5); + assert(host_mdspan.stride(1) == 1); + assert(host_mdspan.size() == 0); + assert(host_mdspan.empty()); + return true; +} + +bool test_empty_tensor_layout_stride_null_strides() +{ + int dummy = 0; // Non-null but won't be accessed + dlpack_array<2> shape = {0, 5}; + DLTensor tensor{}; + tensor.data = &dummy; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = nullptr; // null strides (only valid for DLPack < 1.2) + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.extent(0) == 0); + assert(host_mdspan.extent(1) == 5); + // Should use row-major strides by default + assert(host_mdspan.stride(0) == 5); + assert(host_mdspan.stride(1) == 1); + assert(host_mdspan.size() == 0); + assert(host_mdspan.empty()); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Rank-1 mdspan with layout_right (row-major) + +bool test_rank1() +{ + cuda::std::array data = {1, 2, 3, 4, 5}; + dlpack_array<1> shape = {5}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = ::DLDataType{::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan_right = cuda::to_host_mdspan(tensor); + auto host_mdspan_left = cuda::to_host_mdspan(tensor); + auto host_mdspan_stride = cuda::to_host_mdspan(tensor); + + assert(host_mdspan_right.rank() == 1); + assert(host_mdspan_right.extent(0) == 5); + assert(host_mdspan_right.stride(0) == 1); + for (int i = 0; i < 5; ++i) + { + assert(host_mdspan_right(i) == data[i]); + } + assert(host_mdspan_left.rank() == 1); + assert(host_mdspan_left.extent(0) == 5); + assert(host_mdspan_left.stride(0) == 1); + for (int i = 0; i < 5; ++i) + { + assert(host_mdspan_left(i) == data[i]); + } + assert(host_mdspan_stride.rank() == 1); + assert(host_mdspan_stride.extent(0) == 5); + assert(host_mdspan_stride.stride(0) == 1); + for (int i = 0; i < 5; ++i) + { + assert(host_mdspan_stride(i) == data[i]); + } + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Rank-2 mdspan with layout_right (row-major) + +bool test_rank2_layout_right() +{ + // 2x3 matrix in row-major order + cuda::std::array data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {3, 1}; // row-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 2); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.stride(0) == 3); // row stride + assert(host_mdspan.stride(1) == 1); // column stride + // Check values: row-major layout + assert(host_mdspan(0, 0) == 1.0f); + assert(host_mdspan(0, 1) == 2.0f); + assert(host_mdspan(0, 2) == 3.0f); + assert(host_mdspan(1, 0) == 4.0f); + assert(host_mdspan(1, 1) == 5.0f); + assert(host_mdspan(1, 2) == 6.0f); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Rank-2 mdspan with layout_left (column-major) + +bool test_rank2_layout_left() +{ + // 2x3 matrix in column-major order + cuda::std::array data = {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {1, 2}; // column-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 2); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.stride(0) == 1); // row stride + assert(host_mdspan.stride(1) == 2); // column stride + // Check values: column-major layout + assert(host_mdspan(0, 0) == 1.0f); + assert(host_mdspan(0, 1) == 2.0f); + assert(host_mdspan(0, 2) == 3.0f); + assert(host_mdspan(1, 0) == 4.0f); + assert(host_mdspan(1, 1) == 5.0f); + assert(host_mdspan(1, 2) == 6.0f); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Rank-2 mdspan with layout_stride (arbitrary strides) + +bool test_rank2_layout_stride() +{ + // 2x3 matrix with custom strides (e.g., padded) + cuda::std::array data = {1, 2, 3, 0, 4, 5, 6, 0}; // Each row padded to 4 elements + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {4, 1}; // Row stride = 4 (padded), col stride = 1 + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 2); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.stride(0) == 4); + assert(host_mdspan.stride(1) == 1); + assert(host_mdspan(0, 0) == 1); + assert(host_mdspan(0, 1) == 2); + assert(host_mdspan(0, 2) == 3); + assert(host_mdspan(1, 0) == 4); + assert(host_mdspan(1, 1) == 5); + assert(host_mdspan(1, 2) == 6); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Rank-3 mdspan with layout_right (row-major) + +bool test_rank3_layout_right() +{ + // 2x3x4 tensor in row-major order + cuda::std::array data = { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f}; + dlpack_array<3> shape = {2, 3, 4}; + dlpack_array<3> strides = {12, 4, 1}; // row-major: stride[i] = product of shape[i+1:] + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 3; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 3); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.extent(2) == 4); + assert(host_mdspan.stride(0) == 12); + assert(host_mdspan.stride(1) == 4); + assert(host_mdspan.stride(2) == 1); + // Check values + assert(host_mdspan(0, 0, 0) == 1.0f); + assert(host_mdspan(0, 0, 3) == 4.0f); + assert(host_mdspan(0, 1, 0) == 5.0f); + assert(host_mdspan(0, 2, 3) == 12.0f); + assert(host_mdspan(1, 0, 0) == 13.0f); + assert(host_mdspan(1, 2, 3) == 24.0f); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Rank-3 mdspan with layout_left (column-major) + +bool test_rank3_layout_left() +{ + // 2x3x4 tensor in column-major order + // In column-major, elements are stored with the first index varying fastest + cuda::std::array data = { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f}; + dlpack_array<3> shape = {2, 3, 4}; + dlpack_array<3> strides = {1, 2, 6}; // column-major: stride[i] = product of shape[:i] + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 3; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 3); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.extent(2) == 4); + assert(host_mdspan.stride(0) == 1); + assert(host_mdspan.stride(1) == 2); + assert(host_mdspan.stride(2) == 6); + // Check values: element at (i,j,k) is at index i*1 + j*2 + k*6 + 1 (1-indexed value) + assert(host_mdspan(0, 0, 0) == 1.0f); + assert(host_mdspan(1, 0, 0) == 2.0f); + assert(host_mdspan(0, 1, 0) == 3.0f); + assert(host_mdspan(1, 1, 0) == 4.0f); + assert(host_mdspan(0, 0, 1) == 7.0f); + assert(host_mdspan(1, 2, 3) == 24.0f); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Rank-3 mdspan with layout_stride + +bool test_rank3_layout_stride() +{ + // 2x3x4 tensor with custom strides (padded) + cuda::std::array data{}; // Extra space for padding + // Fill with sequential values at the expected positions + for (int i = 0; i < 2; ++i) + { + for (int j = 0; j < 3; ++j) + { + for (int k = 0; k < 4; ++k) + { + data[i * 16 + j * 5 + k] = i * 12 + j * 4 + k + 1; + } + } + } + dlpack_array<3> shape = {2, 3, 4}; + dlpack_array<3> strides = {16, 5, 1}; // Custom strides with padding + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 3; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 3); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.extent(2) == 4); + assert(host_mdspan.stride(0) == 16); + assert(host_mdspan.stride(1) == 5); + assert(host_mdspan.stride(2) == 1); + // Check values + assert(host_mdspan(0, 0, 0) == 1); + assert(host_mdspan(0, 0, 3) == 4); + assert(host_mdspan(0, 1, 0) == 5); + assert(host_mdspan(0, 2, 3) == 12); + assert(host_mdspan(1, 0, 0) == 13); + assert(host_mdspan(1, 2, 3) == 24); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// const element types + +bool test_const_element_type_rank1() +{ + const cuda::std::array data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + dlpack_array<1> shape = {5}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = const_cast(data.data()); // DLPack uses void*, need const_cast + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + auto host_mdspan = cuda::to_host_mdspan(tensor); + + static_assert(cuda::std::is_same_v); + assert(host_mdspan.rank() == 1); + assert(host_mdspan.extent(0) == 5); + for (int i = 0; i < 5; ++i) + { + assert(host_mdspan(i) == data[i]); + } + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// layout_stride with default (layout_right) strides when strides is nullptr +// Note: This tests the fallback behavior for DLPack < 1.2 + +bool test_layout_stride_null_strides() +{ + cuda::std::array data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + dlpack_array<2> shape = {2, 3}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = nullptr; // null strides + auto host_mdspan = cuda::to_host_mdspan(tensor); + // Should use row-major strides by default + assert(host_mdspan.stride(0) == 3); + assert(host_mdspan.stride(1) == 1); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// byte_offset support + +bool test_byte_offset() +{ + cuda::std::array data = {0, 0, 1, 2, 3, 4, 5, 6}; + // Skip first 2 ints (8 bytes) + dlpack_array<1> shape = {6}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + tensor.byte_offset = sizeof(int) * 2; + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.extent(0) == 6); + assert(host_mdspan(0) == 1); + assert(host_mdspan(5) == 6); + return true; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Return type checking + +bool test_return_types() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + // Check return type of to_host_mdspan + auto host_ms = cuda::to_host_mdspan(tensor); + + static_assert( + cuda::std::is_same_v, cuda::std::layout_stride>>); + assert(host_ms.extent(0) == 4); + + auto host_ms_right = cuda::to_host_mdspan(tensor); + static_assert( + cuda::std::is_same_v, cuda::std::layout_right>>); + assert(host_ms_right.extent(0) == 4); + return true; +} + +int main(int, char**) +{ + NV_IF_TARGET( + NV_IS_HOST, + (assert(test_rank0()); // + // Empty tensor tests + assert(test_empty_tensor_layout_right_first_dim_zero()); + assert(test_empty_tensor_layout_right_second_dim_zero()); + assert(test_empty_tensor_layout_left_first_dim_zero()); + assert(test_empty_tensor_layout_stride_explicit_strides()); + // Rank-1 and Rank-2 tests + assert(test_rank1()); + assert(test_rank2_layout_right()); + assert(test_rank2_layout_left()); + assert(test_rank2_layout_stride()); + // Rank-3 tests + assert(test_rank3_layout_right()); + assert(test_rank3_layout_left()); + assert(test_rank3_layout_stride()); + // Const element type tests + assert(test_const_element_type_rank1()); + // Other tests + assert(test_byte_offset()); + assert(test_return_types());)) +#if !(DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2)) + NV_IF_TARGET(NV_IS_HOST, + (assert(test_layout_stride_null_strides()); // + assert(test_empty_tensor_layout_stride_null_strides());)) +#endif + return 0; +}