diff --git a/docs/libcudacxx/extended_api/mdspan.rst b/docs/libcudacxx/extended_api/mdspan.rst
index ca0582fa0d5..f0b29f3d1d0 100644
--- a/docs/libcudacxx/extended_api/mdspan.rst
+++ b/docs/libcudacxx/extended_api/mdspan.rst
@@ -11,6 +11,7 @@ Mdspan
    mdspan/restrict_accessor
    mdspan/shared_memory_accessor
    mdspan/mdspan_to_dlpack
+   mdspan/dlpack_to_mdspan
 
 .. list-table::
    :widths: 25 45 30 30
@@ -40,3 +41,8 @@ Mdspan
      - Convert a ``mdspan`` to a ``DLTensor``
      - CCCL 3.2.0
      - CUDA 13.2
+
+   * - :ref:`dlpack to mdspan <libcudacxx-extended-api-mdspan-dlpack-to-mdspan>`
+     - Convert a ``DLTensor`` to a ``mdspan``
+     - CCCL 3.2.0
+     - CUDA 13.2
diff --git a/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst b/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst
new file mode 100644
index 00000000000..daa4d7df7fc
--- /dev/null
+++ b/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst
@@ -0,0 +1,138 @@
+.. _libcudacxx-extended-api-mdspan-dlpack-to-mdspan:
+
+DLPack to ``mdspan``
+====================
+
+This functionality provides a conversion from `DLPack <https://dmlc.github.io/dlpack/latest/>`__ ``DLTensor`` to ``cuda::host_mdspan``, ``cuda::device_mdspan``, and ``cuda::managed_mdspan``.
+
+Defined in the ``<cuda/mdspan>`` header.
+
+Conversion functions
+--------------------
+
+.. code:: cuda
+
+   namespace cuda {
+
+   template <typename ElementType, size_t Rank, typename LayoutPolicy = cuda::std::layout_stride>
+   [[nodiscard]] cuda::host_mdspan<ElementType, cuda::std::dims<Rank, int64_t>, LayoutPolicy>
+   to_host_mdspan(const DLTensor& tensor);
+
+   template <typename ElementType, size_t Rank, typename LayoutPolicy = cuda::std::layout_stride>
+   [[nodiscard]] cuda::device_mdspan<ElementType, cuda::std::dims<Rank, int64_t>, LayoutPolicy>
+   to_device_mdspan(const DLTensor& tensor);
+
+   template <typename ElementType, size_t Rank, typename LayoutPolicy = cuda::std::layout_stride>
+   [[nodiscard]] cuda::managed_mdspan<ElementType, cuda::std::dims<Rank, int64_t>, LayoutPolicy>
+   to_managed_mdspan(const DLTensor& tensor);
+
+   } // namespace cuda
+
+Template parameters
+-------------------
+
+- ``ElementType``: The element type of the resulting ``mdspan``. Must match the ``DLTensor::dtype``.
+- ``Rank``: The number of dimensions. Must match ``DLTensor::ndim``.
+- ``LayoutPolicy``: The layout policy for the resulting ``mdspan``. Defaults to ``cuda::std::layout_stride``. Supported layouts are:
+
+  - ``cuda::std::layout_right`` (C-contiguous, row-major)
+  - ``cuda::std::layout_left`` (Fortran-contiguous, column-major)
+  - ``cuda::std::layout_stride`` (general strided layout)
+
+Semantics
+---------
+
+The conversion produces a non-owning ``mdspan`` view of the ``DLTensor`` data:
+
+- The ``mdspan`` data pointer is computed as ``static_cast<char*>(tensor.data) + tensor.byte_offset``.
+- For ``rank > 0``, ``mdspan.extent(i)`` is ``tensor.shape[i]``.
+- For ``layout_stride``, ``mdspan.stride(i)`` is ``tensor.strides[i]`` (or computed as row-major if ``strides`` is ``nullptr`` for DLPack < v1.2).
+- The device type is validated:
+
+  - ``kDLCPU`` for ``to_host_mdspan``
+  - ``kDLCUDA`` for ``to_device_mdspan``
+  - ``kDLCUDAManaged`` for ``to_managed_mdspan``
+
+Supported element types:
+
+- ``bool``.
+- Signed and unsigned integers.
+- IEEE-754 Floating-point and extended precision floating-point, including ``__half``, ``__nv_bfloat16``, ``__float128``, FP8, FP6, FP4 when available.
+- Complex: ``cuda::std::complex<__half>``, ``cuda::std::complex<float>``, and ``cuda::std::complex<double>``.
+- `CUDA built-in vector types <https://docs.nvidia.com/cuda/cuda-programming-guide/05-appendices/cpp-language-extensions.html#built-in-types>`__, such as ``int2``, ``float4``, etc.
+- Vector types for extended floating-point, such as ``__half2``, ``__nv_fp8x4_e4m3``, etc.
+
+Constraints
+-----------
+
+- ``LayoutPolicy`` must be one of ``cuda::std::layout_right``, ``cuda::std::layout_left``, or ``cuda::std::layout_stride``.
+- For ``layout_right`` and ``layout_left``, the ``DLTensor`` strides must be compatible with the layout.
+
+Runtime errors
+--------------
+
+The conversion throws ``std::invalid_argument`` in the following cases:
+
+- ``DLTensor::ndim`` does not match the specified ``Rank``.
+- ``DLTensor::dtype`` does not match ``ElementType``.
+- ``DLTensor::data`` is ``nullptr``.
+- ``DLTensor::shape`` is ``nullptr`` (for rank > 0).
+- Any ``DLTensor::shape[i]`` is negative.
+- ``DLTensor::strides`` is ``nullptr`` for DLPack v1.2 or later.
+- ``DLTensor::strides`` is ``nullptr`` for ``layout_left`` with rank > 1 (DLPack < v1.2).
+- ``DLTensor::strides[i]`` is not positive for ``layout_stride``.
+- ``DLTensor::strides`` are not compatible with the requested ``layout_right`` or ``layout_left``.
+- ``DLTensor::device.device_type`` does not match the target mdspan type.
+- Data pointer is not properly aligned for the element type.
+
+Availability notes
+------------------
+
+- This API is available only when DLPack header is present, namely ``<dlpack/dlpack.h>`` is found in the include path.
+- This API can be disabled by defining ``CCCL_DISABLE_DLPACK`` before including any library headers. In this case, ``<dlpack/dlpack.h>`` will not be included.
+
+References
+----------
+
+- `DLPack C API <https://dmlc.github.io/dlpack/latest/c_api.html>`__ documentation.
+
+Example
+-------
+
+.. code:: cuda
+
+  #include <dlpack/dlpack.h>
+  #include <cuda/mdspan>
+  #include <cuda/std/cassert>
+  #include <cuda/std/cstdint>
+
+  int main() {
+    int data[6] = {0, 1, 2, 3, 4, 5};
+
+    // Create a DLTensor manually for demonstration
+    int64_t shape[2]   = {2, 3};
+    int64_t strides[2] = {3, 1};  // row-major strides
+
+    DLTensor tensor{};
+    tensor.data        = data;
+    tensor.device      = {kDLCPU, 0};
+    tensor.ndim        = 2;
+    tensor.dtype       = DLDataType{kDLInt, 32, 1};
+    tensor.shape       = shape;
+    tensor.strides     = strides;
+    tensor.byte_offset = 0;
+
+    // Convert to host_mdspan
+    auto md = cuda::to_host_mdspan<int, 2>(tensor);
+
+    assert(md.rank() == 2);
+    assert(md.extent(0) == 2 && md.extent(1) == 3);
+    assert(md.stride(0) == 3 && md.stride(1) == 1);
+    assert(md.data_handle() == data);
+    assert(md(0, 0) == 0 && md(1, 2) == 5);
+  }
+
+See also
+--------
+
+- :ref:`libcudacxx-extended-api-mdspan-mdspan-to-dlpack` for the reverse conversion.
diff --git a/libcudacxx/include/cuda/__internal/dlpack.h b/libcudacxx/include/cuda/__internal/dlpack.h
index 61fb5dfcd2a..5af47c34b08 100644
--- a/libcudacxx/include/cuda/__internal/dlpack.h
+++ b/libcudacxx/include/cuda/__internal/dlpack.h
@@ -26,7 +26,7 @@
 #  include <dlpack/dlpack.h>
 
 #  define _CCCL_DLPACK_AT_LEAST(_MAJOR, _MINOR) \
-    (DLPACK_MAJOR_VERSION > (_MAJOR) || (DLPACK_MAJOR_VERSION == (_MAJOR) && DLPACK_VERSION_MINOR >= (_MINOR)))
+    (DLPACK_MAJOR_VERSION > (_MAJOR) || (DLPACK_MAJOR_VERSION == (_MAJOR) && DLPACK_MINOR_VERSION >= (_MINOR)))
 #  define _CCCL_DLPACK_BELOW(_MAJOR, _MINOR) (!_CCCL_DLPACK_AT_LEAST(_MAJOR, _MINOR))
 
 #  if DLPACK_MAJOR_VERSION != 1
diff --git a/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h b/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h
new file mode 100644
index 00000000000..565bb9894c0
--- /dev/null
+++ b/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h
@@ -0,0 +1,270 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H
+#define _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_DLPACK()
+
+#  include <cuda/__cmath/mul_hi.h>
+#  include <cuda/__internal/dlpack.h>
+#  include <cuda/__mdspan/host_device_mdspan.h>
+#  include <cuda/__mdspan/mdspan_to_dlpack.h>
+#  include <cuda/__memory/is_aligned.h>
+#  include <cuda/std/__cstddef/types.h>
+#  include <cuda/std/__exception/exception_macros.h>
+#  include <cuda/std/__type_traits/is_same.h>
+#  include <cuda/std/__utility/cmp.h>
+#  include <cuda/std/array>
+#  include <cuda/std/cstdint>
+#  include <cuda/std/mdspan>
+
+#  if !_CCCL_COMPILER(NVRTC)
+#    include <stdexcept>
+#  endif // !_CCCL_COMPILER(NVRTC)
+
+#  include <dlpack/dlpack.h>
+//
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA
+
+template <typename _ElementType>
+[[nodiscard]] _CCCL_HOST_API inline bool __validate_dlpack_data_type(const ::DLDataType& __dtype) noexcept
+{
+  const auto __expected = ::cuda::__data_type_to_dlpack<_ElementType>();
+  return __dtype.code == __expected.code && __dtype.bits == __expected.bits && __dtype.lanes == __expected.lanes;
+}
+
+[[nodiscard]]
+_CCCL_HOST_API inline ::cuda::std::int64_t
+__get_layout_right_stride(const ::cuda::std::int64_t* __shapes, ::cuda::std::size_t __pos, ::cuda::std::size_t __rank)
+{
+  ::cuda::std::int64_t __stride = 1;
+  for (auto __i = __pos + 1; __i < __rank; ++__i)
+  {
+    // TODO: replace with mul_overflow
+    if (const auto __hi = ::cuda::mul_hi(__stride, __shapes[__i]); __hi != 0 && __hi != -1)
+    {
+      _CCCL_THROW(::std::invalid_argument{"shape overflow"});
+    }
+    __stride *= __shapes[__i]; // TODO: check for overflow
+  }
+  return __stride;
+}
+
+[[nodiscard]]
+_CCCL_HOST_API inline ::cuda::std::int64_t
+__get_layout_left_stride(const ::cuda::std::int64_t* __shapes, ::cuda::std::size_t __pos)
+{
+  ::cuda::std::int64_t __stride = 1;
+  for (::cuda::std::size_t __i = 0; __i < __pos; ++__i)
+  {
+    // TODO: replace with mul_overflow
+    if (const auto __hi = ::cuda::mul_hi(__stride, __shapes[__i]); __hi != 0 && __hi != -1)
+    {
+      _CCCL_THROW(::std::invalid_argument{"shape overflow"});
+    }
+    __stride *= __shapes[__i];
+  }
+  return __stride;
+}
+
+template <typename _LayoutPolicy>
+_CCCL_HOST_API void __validate_dlpack_strides(const ::DLTensor& __tensor, [[maybe_unused]] ::cuda::std::size_t __rank)
+{
+  [[maybe_unused]] constexpr bool __is_layout_right = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_right>;
+  [[maybe_unused]] constexpr bool __is_layout_left  = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_left>;
+  [[maybe_unused]] constexpr bool __is_layout_stride =
+    ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_stride>;
+  const auto __strides_ptr = __tensor.strides;
+  if (__strides_ptr == nullptr)
+  {
+#  if _CCCL_DLPACK_AT_LEAST(1, 2)
+    _CCCL_THROW(::std::invalid_argument{"strides=nullptr is not supported for DLPack v1.2 and later"});
+#  else
+    // strides == nullptr means row-major (C-contiguous) layout
+    if (__is_layout_left && __rank > 1)
+    {
+      _CCCL_THROW(::std::invalid_argument{"strides must be non-null for layout_left"});
+    }
+    else
+    {
+      return;
+    }
+#  endif // _CCCL_DLPACK_AT_LEAST(1, 2)
+  }
+  for (::cuda::std::size_t __pos = 0; __pos < __rank; ++__pos)
+  {
+    if constexpr (__is_layout_right)
+    {
+      if (__strides_ptr[__pos] != ::cuda::__get_layout_right_stride(__tensor.shape, __pos, __rank))
+      {
+        _CCCL_THROW(::std::invalid_argument{"DLTensor strides are not compatible with layout_right"});
+      }
+    }
+    else if constexpr (__is_layout_left)
+    {
+      if (__strides_ptr[__pos] != ::cuda::__get_layout_left_stride(__tensor.shape, __pos))
+      {
+        _CCCL_THROW(::std::invalid_argument{"DLTensor strides are not compatible with layout_left"});
+      }
+    }
+    else if constexpr (__is_layout_stride)
+    {
+      if (__strides_ptr[__pos] <= 0)
+      {
+        _CCCL_THROW(::std::invalid_argument{"layout_stride requires strictly positive strides"});
+      }
+    }
+  }
+}
+
+template <typename _ElementType, ::cuda::std::size_t _Rank, typename _LayoutPolicy>
+[[nodiscard]]
+_CCCL_HOST_API ::cuda::std::mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy>
+__to_mdspan(const ::DLTensor& __tensor)
+{
+  using __extents_type              = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>;
+  using __mdspan_type               = ::cuda::std::mdspan<_ElementType, __extents_type, _LayoutPolicy>;
+  using __mapping_type              = typename _LayoutPolicy::template mapping<__extents_type>;
+  using __element_type              = typename __mdspan_type::element_type;
+  constexpr bool __is_layout_right  = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_right>;
+  constexpr bool __is_layout_left   = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_left>;
+  constexpr bool __is_layout_stride = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_stride>;
+  // TODO: add support for layout_stride_relaxed, layout_right_padded, layout_left_padded
+  if constexpr (!__is_layout_right && !__is_layout_left && !__is_layout_stride)
+  {
+    static_assert(::cuda::std::__always_false_v<_LayoutPolicy>, "Unsupported layout policy");
+    return __mdspan_type{};
+  }
+  else
+  {
+    if (cuda::std::cmp_not_equal(__tensor.ndim, _Rank))
+    {
+      _CCCL_THROW(::std::invalid_argument{"DLTensor rank does not match expected rank"});
+    }
+    if (!::cuda::__validate_dlpack_data_type<__element_type>(__tensor.dtype))
+    {
+      _CCCL_THROW(::std::invalid_argument{"DLTensor data type does not match expected type"});
+    }
+    if (__tensor.data == nullptr)
+    {
+      _CCCL_THROW(::std::invalid_argument{"DLTensor data must be non-null"});
+    }
+    auto __base_data           = static_cast<char*>(__tensor.data) + __tensor.byte_offset;
+    auto __data                = reinterpret_cast<__element_type*>(__base_data);
+    const auto __datatype_size = __tensor.dtype.bits * __tensor.dtype.lanes / 8;
+    // this is not the exact solution because data type size != data type alignment.
+    // However, it always works for the supported data types.
+    if (__datatype_size > 0 && !::cuda::is_aligned(__data, __datatype_size))
+    {
+      _CCCL_THROW(::std::invalid_argument{"DLTensor data must be aligned to the data type"});
+    }
+    if constexpr (_Rank == 0)
+    {
+      return __mdspan_type{__data, __mapping_type{}};
+    }
+    else // Rank > 0
+    {
+      if (__tensor.shape == nullptr)
+      {
+        _CCCL_THROW(::std::invalid_argument{"DLTensor shape must be non-null"});
+      }
+      ::cuda::std::array<::cuda::std::int64_t, _Rank> __extents_array{};
+      for (::cuda::std::size_t __i = 0; __i < _Rank; ++__i)
+      {
+        if (__tensor.shape[__i] < 0)
+        {
+          _CCCL_THROW(::std::invalid_argument{"DLTensor shapes must be positive"});
+        }
+        __extents_array[__i] = __tensor.shape[__i];
+      }
+      ::cuda::__validate_dlpack_strides<_LayoutPolicy>(__tensor, _Rank);
+      if constexpr (__is_layout_stride)
+      {
+        ::cuda::std::array<::cuda::std::int64_t, _Rank> __strides_array{};
+        for (::cuda::std::size_t __i = 0; __i < _Rank; ++__i)
+        {
+          const bool __has_strides = __tensor.strides != nullptr;
+          __strides_array[__i] =
+            __has_strides ? __tensor.strides[__i] : ::cuda::__get_layout_right_stride(__tensor.shape, __i, _Rank);
+        }
+        return __mdspan_type{__data, __mapping_type{__extents_array, __strides_array}};
+      }
+      else
+      {
+        return __mdspan_type{__data, __extents_type{__extents_array}};
+      }
+    }
+  }
+}
+
+/***********************************************************************************************************************
+ * Public API
+ **********************************************************************************************************************/
+
+template <typename _ElementType, ::cuda::std::size_t _Rank, typename _LayoutPolicy = ::cuda::std::layout_stride>
+[[nodiscard]]
+_CCCL_HOST_API ::cuda::host_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy>
+to_host_mdspan(const ::DLTensor& __tensor)
+{
+  if (__tensor.device.device_type != ::kDLCPU)
+  {
+    _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCPU for host_mdspan"});
+  }
+  using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>;
+  using __mdspan_type  = ::cuda::host_mdspan<_ElementType, __extents_type, _LayoutPolicy>;
+  return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)};
+}
+
+template <typename _ElementType, ::cuda::std::size_t _Rank, typename _LayoutPolicy = ::cuda::std::layout_stride>
+[[nodiscard]]
+_CCCL_HOST_API ::cuda::device_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy>
+to_device_mdspan(const ::DLTensor& __tensor)
+{
+  if (__tensor.device.device_type != ::kDLCUDA)
+  {
+    _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCUDA for device_mdspan"});
+  }
+  using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>;
+  using __mdspan_type  = ::cuda::device_mdspan<_ElementType, __extents_type, _LayoutPolicy>;
+  return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)};
+}
+
+template <typename _ElementType, ::cuda::std::size_t _Rank, typename _LayoutPolicy = ::cuda::std::layout_stride>
+[[nodiscard]]
+_CCCL_HOST_API ::cuda::managed_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy>
+to_managed_mdspan(const ::DLTensor& __tensor)
+{
+  if (__tensor.device.device_type != ::kDLCUDAManaged)
+  {
+    _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCUDAManaged for managed_mdspan"});
+  }
+  using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>;
+  using __mdspan_type  = ::cuda::managed_mdspan<_ElementType, __extents_type, _LayoutPolicy>;
+  return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)};
+}
+
+_CCCL_END_NAMESPACE_CUDA
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // __CCCL_HAS_DLPACK()
+#endif // _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H
diff --git a/libcudacxx/include/cuda/mdspan b/libcudacxx/include/cuda/mdspan
index 3129198d02a..f8e36e75e43 100644
--- a/libcudacxx/include/cuda/mdspan
+++ b/libcudacxx/include/cuda/mdspan
@@ -21,6 +21,7 @@
 #  pragma system_header
 #endif // no system header
 
+#include <cuda/__mdspan/dlpack_to_mdspan.h>
 #include <cuda/__mdspan/host_device_mdspan.h>
 #include <cuda/__mdspan/mdspan_to_dlpack.h>
 #include <cuda/__mdspan/restrict_mdspan.h>
diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.exceptions.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.exceptions.pass.cpp
new file mode 100644
index 00000000000..8fc6e55839c
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.exceptions.pass.cpp
@@ -0,0 +1,363 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+// UNSUPPORTED: nvrtc
+
+#include <cuda/mdspan>
+#include <cuda/std/array>
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+
+#include <nv/target>
+
+#include "test_macros.h"
+#include <dlpack/dlpack.h>
+
+template <size_t Rank>
+using dlpack_array = cuda::std::array<int64_t, Rank>;
+
+//----------------------------------------------------------------------------------------------------------------------
+// Exception tests
+
+void test_exception_wrong_rank()
+{
+  cuda::std::array<int, 6> data{};
+  dlpack_array<2> shape   = {2, 3};
+  dlpack_array<2> strides = {3, 1}; // row-major
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 2;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    // Try to convert rank-2 tensor to rank-1 mdspan
+    unused(cuda::to_host_mdspan<int, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_wrong_dtype()
+{
+  cuda::std::array<int, 4> data{};
+  dlpack_array<1> shape   = {4};
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 1;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; // dtype is int
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    // Try to convert int tensor to float mdspan
+    unused(cuda::to_host_mdspan<float, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_null_data()
+{
+  dlpack_array<1> shape   = {4};
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data    = nullptr;
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 1;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<int, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_null_shape()
+{
+  cuda::std::array<int, 4> data{};
+  DLTensor tensor{};
+  tensor.data   = data.data();
+  tensor.device = DLDevice{kDLCPU, 0};
+  tensor.ndim   = 1;
+  tensor.dtype  = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape  = nullptr; // null shape
+  bool caught   = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<int, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_negative_shape()
+{
+  cuda::std::array<int, 4> data{};
+  dlpack_array<1> shape   = {-3}; // negative shape
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 1;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<int, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_wrong_device_type_host()
+{
+  cuda::std::array<int, 4> data{};
+  dlpack_array<1> shape   = {4};
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{::kDLCUDA, 0}; // CUDA device, not CPU
+  tensor.ndim    = 1;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<int, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_wrong_device_type_device()
+{
+  cuda::std::array<int, 4> data{};
+  dlpack_array<1> shape   = {4};
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0}; // CPU device, not CUDA
+  tensor.ndim    = 1;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_device_mdspan<int, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_wrong_device_type_managed()
+{
+  cuda::std::array<int, 4> data{};
+  dlpack_array<1> shape   = {4};
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0}; // CPU device, not CUDA managed
+  tensor.ndim    = 1;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_managed_mdspan<int, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_stride_mismatch_layout_right()
+{
+  cuda::std::array<float, 6> data{};
+  dlpack_array<2> shape   = {2, 3};
+  dlpack_array<2> strides = {1, 2}; // Column-major, not row-major
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 2;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLFloat, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<float, 2, cuda::std::layout_right>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_stride_mismatch_layout_left()
+{
+  cuda::std::array<float, 6> data{};
+  dlpack_array<2> shape   = {2, 3};
+  dlpack_array<2> strides = {3, 1}; // Row-major, not column-major
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 2;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLFloat, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<float, 2, cuda::std::layout_left>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_zero_stride_layout_stride()
+{
+  cuda::std::array<int, 6> data{};
+  dlpack_array<2> shape   = {2, 3};
+  dlpack_array<2> strides = {0, 1}; // Zero stride is invalid
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 2;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<int, 2, cuda::std::layout_stride>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_null_strides_dlpack_v12()
+{
+  cuda::std::array<float, 6> data{};
+  dlpack_array<2> shape = {2, 3};
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 2;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLFloat, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = nullptr; // null strides not allowed in DLPack v1.2+
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<float, 2>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+void test_exception_misaligned_data()
+{
+  // Create a buffer that allows us to get a misaligned pointer
+  alignas(16) cuda::std::array<char, 20> buffer{};
+  // Get a pointer that's 1 byte into the buffer (misaligned for int)
+  auto misaligned_ptr     = reinterpret_cast<int*>(buffer.data() + 1);
+  dlpack_array<1> shape   = {3};
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data    = misaligned_ptr;
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 1;
+  tensor.dtype   = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  bool caught    = false;
+  try
+  {
+    unused(cuda::to_host_mdspan<int, 1>(tensor));
+  }
+  catch (const std::invalid_argument&)
+  {
+    caught = true;
+  }
+  assert(caught);
+}
+
+bool test_exceptions()
+{
+  test_exception_wrong_rank();
+  test_exception_wrong_dtype();
+  test_exception_null_data();
+  test_exception_null_shape();
+  test_exception_negative_shape();
+  test_exception_wrong_device_type_host();
+  test_exception_wrong_device_type_device();
+  test_exception_wrong_device_type_managed();
+  test_exception_stride_mismatch_layout_right();
+  test_exception_stride_mismatch_layout_left();
+  test_exception_zero_stride_layout_stride();
+#if DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2)
+  test_exception_null_strides_dlpack_v12();
+#endif
+  test_exception_misaligned_data();
+  return true;
+}
+
+int main(int, char**)
+{
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_exceptions());))
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp
new file mode 100644
index 00000000000..c2d4b7fdd94
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp
@@ -0,0 +1,554 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+// UNSUPPORTED: nvrtc
+
+#include <cuda/mdspan>
+#include <cuda/std/array>
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+#include <cuda/std/type_traits>
+
+#include <nv/target>
+
+#include "test_macros.h"
+#include <dlpack/dlpack.h>
+
+template <size_t Rank>
+using dlpack_array = cuda::std::array<int64_t, Rank>;
+
+//----------------------------------------------------------------------------------------------------------------------
+// Rank-0 mdspan conversion
+
+bool test_rank0()
+{
+  float data = 42.0f;
+  DLTensor tensor{};
+  tensor.data      = &data;
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 0;
+  tensor.dtype     = DLDataType{DLDataTypeCode::kDLFloat, 32, 1};
+  auto host_mdspan = cuda::to_host_mdspan<float, 0>(tensor);
+
+  assert(host_mdspan.rank() == 0);
+  assert(host_mdspan.size() == 1);
+  assert(host_mdspan.data_handle() == &data);
+  assert(host_mdspan() == 42.0f);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Empty tensor (zero in one dimension)
+
+bool test_empty_tensor_layout_right_first_dim_zero()
+{
+  int dummy               = 0; // Non-null but won't be accessed
+  dlpack_array<2> shape   = {0, 5};
+  dlpack_array<2> strides = {5, 1}; // row-major
+  DLTensor tensor{};
+  tensor.data      = &dummy;
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<int, 2, cuda::std::layout_right>(tensor);
+
+  assert(host_mdspan.extent(0) == 0);
+  assert(host_mdspan.extent(1) == 5);
+  assert(host_mdspan.size() == 0);
+  assert(host_mdspan.empty());
+  return true;
+}
+
+bool test_empty_tensor_layout_right_second_dim_zero()
+{
+  int dummy               = 0; // Non-null but won't be accessed
+  dlpack_array<2> shape   = {2, 0};
+  dlpack_array<2> strides = {0, 1}; // row-major: stride[0] = 0 * 1 = 0, stride[1] = 1
+  DLTensor tensor{};
+  tensor.data      = &dummy;
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<int, 2, cuda::std::layout_right>(tensor);
+
+  assert(host_mdspan.extent(0) == 2);
+  assert(host_mdspan.extent(1) == 0);
+  assert(host_mdspan.size() == 0);
+  assert(host_mdspan.empty());
+  return true;
+}
+
+bool test_empty_tensor_layout_left_first_dim_zero()
+{
+  int dummy               = 0; // Non-null but won't be accessed
+  dlpack_array<2> shape   = {0, 5};
+  dlpack_array<2> strides = {1, 0}; // column-major: stride[0] = 1, stride[1] = 0 * 1 = 0
+  DLTensor tensor{};
+  tensor.data      = &dummy;
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<int, 2, cuda::std::layout_left>(tensor);
+
+  assert(host_mdspan.extent(0) == 0);
+  assert(host_mdspan.extent(1) == 5);
+  assert(host_mdspan.size() == 0);
+  assert(host_mdspan.empty());
+  return true;
+}
+
+bool test_empty_tensor_layout_stride_explicit_strides()
+{
+  int dummy               = 0; // Non-null but won't be accessed
+  dlpack_array<2> shape   = {0, 5};
+  dlpack_array<2> strides = {5, 1}; // explicit strides
+  DLTensor tensor{};
+  tensor.data      = &dummy;
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<int, 2, cuda::std::layout_stride>(tensor);
+
+  assert(host_mdspan.extent(0) == 0);
+  assert(host_mdspan.extent(1) == 5);
+  assert(host_mdspan.stride(0) == 5);
+  assert(host_mdspan.stride(1) == 1);
+  assert(host_mdspan.size() == 0);
+  assert(host_mdspan.empty());
+  return true;
+}
+
+bool test_empty_tensor_layout_stride_null_strides()
+{
+  int dummy             = 0; // Non-null but won't be accessed
+  dlpack_array<2> shape = {0, 5};
+  DLTensor tensor{};
+  tensor.data      = &dummy;
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape     = shape.data();
+  tensor.strides   = nullptr; // null strides (only valid for DLPack < 1.2)
+  auto host_mdspan = cuda::to_host_mdspan<int, 2, cuda::std::layout_stride>(tensor);
+
+  assert(host_mdspan.extent(0) == 0);
+  assert(host_mdspan.extent(1) == 5);
+  // Should use row-major strides by default
+  assert(host_mdspan.stride(0) == 5);
+  assert(host_mdspan.stride(1) == 1);
+  assert(host_mdspan.size() == 0);
+  assert(host_mdspan.empty());
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Rank-1 mdspan with layout_right (row-major)
+
+bool test_rank1()
+{
+  cuda::std::array<int, 5> data = {1, 2, 3, 4, 5};
+  dlpack_array<1> shape         = {5};
+  dlpack_array<1> strides       = {1};
+  DLTensor tensor{};
+  tensor.data             = data.data();
+  tensor.device           = DLDevice{kDLCPU, 0};
+  tensor.ndim             = 1;
+  tensor.dtype            = ::DLDataType{::kDLInt, 32, 1};
+  tensor.shape            = shape.data();
+  tensor.strides          = strides.data();
+  auto host_mdspan_right  = cuda::to_host_mdspan<int, 1, cuda::std::layout_right>(tensor);
+  auto host_mdspan_left   = cuda::to_host_mdspan<int, 1, cuda::std::layout_left>(tensor);
+  auto host_mdspan_stride = cuda::to_host_mdspan<int, 1, cuda::std::layout_stride>(tensor);
+
+  assert(host_mdspan_right.rank() == 1);
+  assert(host_mdspan_right.extent(0) == 5);
+  assert(host_mdspan_right.stride(0) == 1);
+  for (int i = 0; i < 5; ++i)
+  {
+    assert(host_mdspan_right(i) == data[i]);
+  }
+  assert(host_mdspan_left.rank() == 1);
+  assert(host_mdspan_left.extent(0) == 5);
+  assert(host_mdspan_left.stride(0) == 1);
+  for (int i = 0; i < 5; ++i)
+  {
+    assert(host_mdspan_left(i) == data[i]);
+  }
+  assert(host_mdspan_stride.rank() == 1);
+  assert(host_mdspan_stride.extent(0) == 5);
+  assert(host_mdspan_stride.stride(0) == 1);
+  for (int i = 0; i < 5; ++i)
+  {
+    assert(host_mdspan_stride(i) == data[i]);
+  }
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Rank-2 mdspan with layout_right (row-major)
+
+bool test_rank2_layout_right()
+{
+  // 2x3 matrix in row-major order
+  cuda::std::array<float, 6> data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  dlpack_array<2> shape           = {2, 3};
+  dlpack_array<2> strides         = {3, 1}; // row-major
+  DLTensor tensor{};
+  tensor.data      = data.data();
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = cuda::__data_type_to_dlpack<float>();
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<float, 2, cuda::std::layout_right>(tensor);
+
+  assert(host_mdspan.rank() == 2);
+  assert(host_mdspan.extent(0) == 2);
+  assert(host_mdspan.extent(1) == 3);
+  assert(host_mdspan.stride(0) == 3); // row stride
+  assert(host_mdspan.stride(1) == 1); // column stride
+  // Check values: row-major layout
+  assert(host_mdspan(0, 0) == 1.0f);
+  assert(host_mdspan(0, 1) == 2.0f);
+  assert(host_mdspan(0, 2) == 3.0f);
+  assert(host_mdspan(1, 0) == 4.0f);
+  assert(host_mdspan(1, 1) == 5.0f);
+  assert(host_mdspan(1, 2) == 6.0f);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Rank-2 mdspan with layout_left (column-major)
+
+bool test_rank2_layout_left()
+{
+  // 2x3 matrix in column-major order
+  cuda::std::array<float, 6> data = {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f};
+  dlpack_array<2> shape           = {2, 3};
+  dlpack_array<2> strides         = {1, 2}; // column-major
+  DLTensor tensor{};
+  tensor.data      = data.data();
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = cuda::__data_type_to_dlpack<float>();
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<float, 2, cuda::std::layout_left>(tensor);
+
+  assert(host_mdspan.rank() == 2);
+  assert(host_mdspan.extent(0) == 2);
+  assert(host_mdspan.extent(1) == 3);
+  assert(host_mdspan.stride(0) == 1); // row stride
+  assert(host_mdspan.stride(1) == 2); // column stride
+  // Check values: column-major layout
+  assert(host_mdspan(0, 0) == 1.0f);
+  assert(host_mdspan(0, 1) == 2.0f);
+  assert(host_mdspan(0, 2) == 3.0f);
+  assert(host_mdspan(1, 0) == 4.0f);
+  assert(host_mdspan(1, 1) == 5.0f);
+  assert(host_mdspan(1, 2) == 6.0f);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Rank-2 mdspan with layout_stride (arbitrary strides)
+
+bool test_rank2_layout_stride()
+{
+  // 2x3 matrix with custom strides (e.g., padded)
+  cuda::std::array<int, 8> data = {1, 2, 3, 0, 4, 5, 6, 0}; // Each row padded to 4 elements
+  dlpack_array<2> shape         = {2, 3};
+  dlpack_array<2> strides       = {4, 1}; // Row stride = 4 (padded), col stride = 1
+  DLTensor tensor{};
+  tensor.data      = data.data();
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = cuda::__data_type_to_dlpack<int>();
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<int, 2, cuda::std::layout_stride>(tensor);
+
+  assert(host_mdspan.rank() == 2);
+  assert(host_mdspan.extent(0) == 2);
+  assert(host_mdspan.extent(1) == 3);
+  assert(host_mdspan.stride(0) == 4);
+  assert(host_mdspan.stride(1) == 1);
+  assert(host_mdspan(0, 0) == 1);
+  assert(host_mdspan(0, 1) == 2);
+  assert(host_mdspan(0, 2) == 3);
+  assert(host_mdspan(1, 0) == 4);
+  assert(host_mdspan(1, 1) == 5);
+  assert(host_mdspan(1, 2) == 6);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Rank-3 mdspan with layout_right (row-major)
+
+bool test_rank3_layout_right()
+{
+  // 2x3x4 tensor in row-major order
+  cuda::std::array<float, 24> data = {
+    1.0f,  2.0f,  3.0f,  4.0f,  5.0f,  6.0f,  7.0f,  8.0f,  9.0f,  10.0f, 11.0f, 12.0f,
+    13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f};
+  dlpack_array<3> shape   = {2, 3, 4};
+  dlpack_array<3> strides = {12, 4, 1}; // row-major: stride[i] = product of shape[i+1:]
+  DLTensor tensor{};
+  tensor.data      = data.data();
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 3;
+  tensor.dtype     = cuda::__data_type_to_dlpack<float>();
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<float, 3, cuda::std::layout_right>(tensor);
+
+  assert(host_mdspan.rank() == 3);
+  assert(host_mdspan.extent(0) == 2);
+  assert(host_mdspan.extent(1) == 3);
+  assert(host_mdspan.extent(2) == 4);
+  assert(host_mdspan.stride(0) == 12);
+  assert(host_mdspan.stride(1) == 4);
+  assert(host_mdspan.stride(2) == 1);
+  // Check values
+  assert(host_mdspan(0, 0, 0) == 1.0f);
+  assert(host_mdspan(0, 0, 3) == 4.0f);
+  assert(host_mdspan(0, 1, 0) == 5.0f);
+  assert(host_mdspan(0, 2, 3) == 12.0f);
+  assert(host_mdspan(1, 0, 0) == 13.0f);
+  assert(host_mdspan(1, 2, 3) == 24.0f);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Rank-3 mdspan with layout_left (column-major)
+
+bool test_rank3_layout_left()
+{
+  // 2x3x4 tensor in column-major order
+  // In column-major, elements are stored with the first index varying fastest
+  cuda::std::array<float, 24> data = {
+    1.0f,  2.0f,  3.0f,  4.0f,  5.0f,  6.0f,  7.0f,  8.0f,  9.0f,  10.0f, 11.0f, 12.0f,
+    13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f};
+  dlpack_array<3> shape   = {2, 3, 4};
+  dlpack_array<3> strides = {1, 2, 6}; // column-major: stride[i] = product of shape[:i]
+  DLTensor tensor{};
+  tensor.data      = data.data();
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 3;
+  tensor.dtype     = cuda::__data_type_to_dlpack<float>();
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<float, 3, cuda::std::layout_left>(tensor);
+
+  assert(host_mdspan.rank() == 3);
+  assert(host_mdspan.extent(0) == 2);
+  assert(host_mdspan.extent(1) == 3);
+  assert(host_mdspan.extent(2) == 4);
+  assert(host_mdspan.stride(0) == 1);
+  assert(host_mdspan.stride(1) == 2);
+  assert(host_mdspan.stride(2) == 6);
+  // Check values: element at (i,j,k) is at index i*1 + j*2 + k*6 + 1 (1-indexed value)
+  assert(host_mdspan(0, 0, 0) == 1.0f);
+  assert(host_mdspan(1, 0, 0) == 2.0f);
+  assert(host_mdspan(0, 1, 0) == 3.0f);
+  assert(host_mdspan(1, 1, 0) == 4.0f);
+  assert(host_mdspan(0, 0, 1) == 7.0f);
+  assert(host_mdspan(1, 2, 3) == 24.0f);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Rank-3 mdspan with layout_stride
+
+bool test_rank3_layout_stride()
+{
+  // 2x3x4 tensor with custom strides (padded)
+  cuda::std::array<int, 32> data{}; // Extra space for padding
+  // Fill with sequential values at the expected positions
+  for (int i = 0; i < 2; ++i)
+  {
+    for (int j = 0; j < 3; ++j)
+    {
+      for (int k = 0; k < 4; ++k)
+      {
+        data[i * 16 + j * 5 + k] = i * 12 + j * 4 + k + 1;
+      }
+    }
+  }
+  dlpack_array<3> shape   = {2, 3, 4};
+  dlpack_array<3> strides = {16, 5, 1}; // Custom strides with padding
+  DLTensor tensor{};
+  tensor.data      = data.data();
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 3;
+  tensor.dtype     = cuda::__data_type_to_dlpack<int>();
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<int, 3, cuda::std::layout_stride>(tensor);
+
+  assert(host_mdspan.rank() == 3);
+  assert(host_mdspan.extent(0) == 2);
+  assert(host_mdspan.extent(1) == 3);
+  assert(host_mdspan.extent(2) == 4);
+  assert(host_mdspan.stride(0) == 16);
+  assert(host_mdspan.stride(1) == 5);
+  assert(host_mdspan.stride(2) == 1);
+  // Check values
+  assert(host_mdspan(0, 0, 0) == 1);
+  assert(host_mdspan(0, 0, 3) == 4);
+  assert(host_mdspan(0, 1, 0) == 5);
+  assert(host_mdspan(0, 2, 3) == 12);
+  assert(host_mdspan(1, 0, 0) == 13);
+  assert(host_mdspan(1, 2, 3) == 24);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// const element types
+
+bool test_const_element_type_rank1()
+{
+  const cuda::std::array<float, 5> data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+  dlpack_array<1> shape                 = {5};
+  dlpack_array<1> strides               = {1};
+  DLTensor tensor{};
+  tensor.data      = const_cast<float*>(data.data()); // DLPack uses void*, need const_cast
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 1;
+  tensor.dtype     = cuda::__data_type_to_dlpack<float>();
+  tensor.shape     = shape.data();
+  tensor.strides   = strides.data();
+  auto host_mdspan = cuda::to_host_mdspan<const float, 1>(tensor);
+
+  static_assert(cuda::std::is_same_v<typename decltype(host_mdspan)::element_type, const float>);
+  assert(host_mdspan.rank() == 1);
+  assert(host_mdspan.extent(0) == 5);
+  for (int i = 0; i < 5; ++i)
+  {
+    assert(host_mdspan(i) == data[i]);
+  }
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// layout_stride with default (layout_right) strides when strides is nullptr
+// Note: This tests the fallback behavior for DLPack < 1.2
+
+bool test_layout_stride_null_strides()
+{
+  cuda::std::array<float, 6> data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  dlpack_array<2> shape           = {2, 3};
+  DLTensor tensor{};
+  tensor.data      = data.data();
+  tensor.device    = DLDevice{kDLCPU, 0};
+  tensor.ndim      = 2;
+  tensor.dtype     = cuda::__data_type_to_dlpack<float>();
+  tensor.shape     = shape.data();
+  tensor.strides   = nullptr; // null strides
+  auto host_mdspan = cuda::to_host_mdspan<float, 2, cuda::std::layout_stride>(tensor);
+  // Should use row-major strides by default
+  assert(host_mdspan.stride(0) == 3);
+  assert(host_mdspan.stride(1) == 1);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// byte_offset support
+
+bool test_byte_offset()
+{
+  cuda::std::array<int, 8> data = {0, 0, 1, 2, 3, 4, 5, 6};
+  // Skip first 2 ints (8 bytes)
+  dlpack_array<1> shape   = {6};
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data        = data.data();
+  tensor.device      = DLDevice{kDLCPU, 0};
+  tensor.ndim        = 1;
+  tensor.dtype       = DLDataType{DLDataTypeCode::kDLInt, 32, 1};
+  tensor.shape       = shape.data();
+  tensor.strides     = strides.data();
+  tensor.byte_offset = sizeof(int) * 2;
+  auto host_mdspan   = cuda::to_host_mdspan<int, 1>(tensor);
+
+  assert(host_mdspan.extent(0) == 6);
+  assert(host_mdspan(0) == 1);
+  assert(host_mdspan(5) == 6);
+  return true;
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Return type checking
+
+bool test_return_types()
+{
+  cuda::std::array<float, 4> data{};
+  dlpack_array<1> shape   = {4};
+  dlpack_array<1> strides = {1};
+  DLTensor tensor{};
+  tensor.data    = data.data();
+  tensor.device  = DLDevice{kDLCPU, 0};
+  tensor.ndim    = 1;
+  tensor.dtype   = cuda::__data_type_to_dlpack<float>();
+  tensor.shape   = shape.data();
+  tensor.strides = strides.data();
+  // Check return type of to_host_mdspan
+  auto host_ms = cuda::to_host_mdspan<float, 1>(tensor);
+
+  static_assert(
+    cuda::std::is_same_v<decltype(host_ms),
+                         cuda::host_mdspan<float, cuda::std::dextents<int64_t, 1>, cuda::std::layout_stride>>);
+  assert(host_ms.extent(0) == 4);
+
+  auto host_ms_right = cuda::to_host_mdspan<float, 1, cuda::std::layout_right>(tensor);
+  static_assert(
+    cuda::std::is_same_v<decltype(host_ms_right),
+                         cuda::host_mdspan<float, cuda::std::dextents<int64_t, 1>, cuda::std::layout_right>>);
+  assert(host_ms_right.extent(0) == 4);
+  return true;
+}
+
+int main(int, char**)
+{
+  NV_IF_TARGET(
+    NV_IS_HOST,
+    (assert(test_rank0()); //
+                           // Empty tensor tests
+     assert(test_empty_tensor_layout_right_first_dim_zero());
+     assert(test_empty_tensor_layout_right_second_dim_zero());
+     assert(test_empty_tensor_layout_left_first_dim_zero());
+     assert(test_empty_tensor_layout_stride_explicit_strides());
+     // Rank-1 and Rank-2 tests
+     assert(test_rank1());
+     assert(test_rank2_layout_right());
+     assert(test_rank2_layout_left());
+     assert(test_rank2_layout_stride());
+     // Rank-3 tests
+     assert(test_rank3_layout_right());
+     assert(test_rank3_layout_left());
+     assert(test_rank3_layout_stride());
+     // Const element type tests
+     assert(test_const_element_type_rank1());
+     // Other tests
+     assert(test_byte_offset());
+     assert(test_return_types());))
+#if !(DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2))
+  NV_IF_TARGET(NV_IS_HOST,
+               (assert(test_layout_stride_null_strides()); //
+                assert(test_empty_tensor_layout_stride_null_strides());))
+#endif
+  return 0;
+}