Skip to content

Conversation

@matthiasdiener
Copy link
Collaborator

@matthiasdiener matthiasdiener commented Mar 4, 2025

Towards #303. Needs inducer/pytato#587

  • Revert requirements.txt before merge.

Closes #303.

@matthiasdiener matthiasdiener self-assigned this Mar 4, 2025
@inducer
Copy link
Owner

inducer commented Mar 4, 2025

Editing requirements.txt for pytato could make the failures go away.

@matthiasdiener
Copy link
Collaborator Author

matthiasdiener commented Mar 4, 2025

No idea why this fails with (fixed in a2e322e)

Details
=============================================== FAILURES ================================================
_ test_actx_compile[<PytatoPyOpenCLArrayContext for <pyopencl.Device 'cpu' on 'Portable Computing Language'>>] _

actx_factory = <test_arraycontext._PytatoPyOpenCLArrayContextForTestsFactory object at 0x10909e1e0>

    def test_actx_compile(actx_factory):
        actx = actx_factory()
        rng = np.random.default_rng()

        compiled_rhs = actx.compile(scale_and_orthogonalize)

        v_x = rng.uniform(size=10)
        v_y = rng.uniform(size=10)

        vel = actx.from_numpy(Velocity2D(v_x, v_y, actx))

>       scaled_speed = compiled_rhs(np.float64(3.14), vel)

test/test_arraycontext.py:1100:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
arraycontext/impl/pytato/compile.py:379: in __call__
    return compiled_func(arg_id_to_arg)
arraycontext/impl/pytato/compile.py:649: in __call__
    evt, out_dict = self.pytato_program(queue=self.actx.queue,
../pytato/pytato/target/loopy/__init__.py:333: in __call__
    return self.program(queue,
../loopy/loopy/target/pyopencl_execution.py:395: in __call__
    return translation_unit_info.invoker(
../pytools/pytools/py_codegen.py:154: in __call__
    return self.func(*args, **kwargs)
<generated code for 'invoke_scale_and_orthogonalize_loopy_kernel'>:205: in invoke_scale_and_orthogonalize_loopy_kernel
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

_lpy_cl_kernels = <loopy.target.pyopencl_execution._Kernels object at 0x115d08980>
queue = <pyopencl._cl.CommandQueue object at 0x116906a70>, _actx_in_0 = cl.Array(3.14)
_actx_in_1_u = <pyopencl._cl.SVMAllocation object at 0x116b48220>
_pt_out_v = <pyopencl._cl.SVMAllocation object at 0x116911a80>
_actx_in_1_v = <pyopencl._cl.SVMAllocation object at 0x115dcdcc0>
_pt_out_u = <pyopencl._cl.SVMAllocation object at 0x116911900>, _actx_in_1_u_offset = 0
_actx_in_1_v_offset = 0
wait_for = [<pyopencl._cl.Event object at 0x116a8f250>, <pyopencl._cl.Event object at 0x116a8cbd0>]
allocator = <pyopencl._cl.SVMAllocator object at 0x11682ed80>

>   ???
E   RuntimeError: when processing arg#1 (1-based): std::bad_cast

<generated code for 'invoke_scale_and_orthogonalize_loopy_kernel'>:42: RuntimeError
----------------------------------------- Captured stdout call ------------------------------------------
import numpy as _lpy_np
import pyopencl as _lpy_cl
import pyopencl.array as _lpy_cl_array
import pyopencl.tools as _lpy_cl_tools
from struct import pack as _lpy_pack

def _lpy_even_div(a, b):
    result, remdr = divmod(a, b)
    if remdr != 0:
        # FIXME: This error message is kind of crummy.
        raise ValueError("expected even division")
    return result


def _lpy_even_div_none(a, b):
    if a is None:
        return None

    result, remdr = divmod(a, b)
    if remdr != 0:
        # FIXME: This error message is kind of crummy.
        raise ValueError("expected even division")
    return result



def _lpy_host_scale_and_orthogonalize(_lpy_cl_kernels, queue, _actx_in_0, _actx_in_1_u, _pt_out_v, _actx_in_1_v, _pt_out_u, _actx_in_1_u_offset, _actx_in_1_v_offset, wait_for=None, allocator=None):


    pass
    # {{{ enqueue scale_and_orthogonalize

    _lpy_knl = _lpy_cl_kernels.scale_and_orthogonalize
    assert _lpy_knl.num_args == 7, f'Kernel "scale_and_orthogonalize" invoker argument count (7) does not match the argument count of the kernel ({_lpy_knl.num_args}).'

    if _actx_in_0 is None:
        raise RuntimeError("input argument '{var_descr.name}' must be supplied")
    if _actx_in_1_u_offset is None:
        raise RuntimeError("input argument '{var_descr.name}' must be supplied")
    if _actx_in_1_v_offset is None:
        raise RuntimeError("input argument '{var_descr.name}' must be supplied")
    _lpy_knl._set_arg_buf_pack_multi((0, b'd', _actx_in_0, 5, b'i', _actx_in_1_u_offset, 6, b'i', _actx_in_1_v_offset,), )
    _lpy_knl._set_arg_multi((1, _actx_in_1_u, 2, _pt_out_v, 3, _actx_in_1_v, 4, _pt_out_u,))
    pass
    _lpy_evt = _lpy_cl.enqueue_nd_range_kernel(queue, _lpy_knl, (1,), (1,), None, wait_for, True, True, )
    wait_for = [_lpy_evt]

    # }}}



    return _lpy_evt

_lpy_dtype_id = _lpy_np.dtype(_lpy_np.dtype(_lpy_np.float64))
def invoke_scale_and_orthogonalize_loopy_kernel(_lpy_cl_kernels, queue, allocator=None, wait_for=None, out_host=None, _actx_in_0=None, _actx_in_1_u=None, _pt_out_v=None, _actx_in_1_v=None, _pt_out_u=None, _actx_in_1_u_offset=None, _actx_in_1_v_offset=None):
    if allocator is None:
        allocator = _lpy_cl_tools.DeferredAllocator(queue.context)

    # {{{ find integer arguments from array data

    if _actx_in_1_u_offset is None:
        if _actx_in_1_u is not None:
            _actx_in_1_u_offset = _lpy_even_div_none(getattr(_actx_in_1_u, "offset", None), 8)

    if _actx_in_1_v_offset is None:
        if _actx_in_1_v is not None:
            _actx_in_1_v_offset = _lpy_even_div_none(getattr(_actx_in_1_v, "offset", None), 8)

    if _actx_in_1_u_offset is None:
        if True:
            _actx_in_1_u_offset = 0

    if _actx_in_1_v_offset is None:
        if True:
            _actx_in_1_v_offset = 0

    # }}}

    # {{{ check that value args are present

    if _actx_in_0 is None:
        raise TypeError("value argument '_actx_in_0' was not given and could not be automatically determined")
    if _actx_in_1_u_offset is None:
        raise TypeError("value argument '_actx_in_1_u_offset' was not given and could not be automatically determined")
    if _actx_in_1_v_offset is None:
        raise TypeError("value argument '_actx_in_1_v_offset' was not given and could not be automatically determined")
    # }}}

    # {{{ set up array arguments

    # {{{ process _actx_in_1_u

    if _actx_in_1_u is None:
        raise RuntimeError("input argument '_actx_in_1_u' must be supplied")

    if True:
        if _actx_in_1_u.dtype != _lpy_dtype_id:
            raise TypeError("dtype mismatch on argument '_actx_in_1_u' (got: %s, expected: np:dtype('float64'))" % _actx_in_1_u.dtype)
        if _actx_in_1_u.shape != (10,):
            raise ValueError("shape mismatch on argument '_actx_in_1_u' (got: %s, expected: %s)" % (_actx_in_1_u.shape, (10,)))
        (_lpy_shape_0,) = _actx_in_1_u.shape
        (_lpy_stride_0,) = _actx_in_1_u.strides
        if not (((_lpy_shape_0 == 1 or _lpy_stride_0 == 8)) or not _lpy_shape_0):
            _lpy_got = _actx_in_1_u.strides
            _lpy_expected = (8,)
            raise ValueError("strides mismatch on argument '_actx_in_1_u' (got: %s, expected: %s)" % (_lpy_got, _lpy_expected))

    # }}}

    # {{{ process _pt_out_v

    _lpy_made_by_loopy = False

    if _pt_out_v is None:
        _lpy_ustrides_0 = 1
        assert _lpy_ustrides_0 >= 0, "'_pt_out_v' has negative stride in axis 0"
        _lpy_size = 1 + 0 + _lpy_ustrides_0*9
        _pt_out_v = _lpy_cl_array.Array(None, (10,), _lpy_dtype_id, strides=(8*_lpy_ustrides_0,), data=allocator(8*_lpy_size), allocator=allocator, _fast=True, _size=_lpy_size, _context=queue.context, _queue=queue)
        del _lpy_ustrides_0
        del _lpy_size

        _lpy_made_by_loopy = True

    if not _lpy_made_by_loopy:
        if _pt_out_v.dtype != _lpy_dtype_id:
            raise TypeError("dtype mismatch on argument '_pt_out_v' (got: %s, expected: np:dtype('float64'))" % _pt_out_v.dtype)
        if _pt_out_v.shape != (10,):
            raise ValueError("shape mismatch on argument '_pt_out_v' (got: %s, expected: %s)" % (_pt_out_v.shape, (10,)))
        (_lpy_shape_0,) = _pt_out_v.shape
        (_lpy_stride_0,) = _pt_out_v.strides
        if not (((_lpy_shape_0 == 1 or _lpy_stride_0 == 8)) or not _lpy_shape_0):
            _lpy_got = _pt_out_v.strides
            _lpy_expected = (8,)
            raise ValueError("strides mismatch on argument '_pt_out_v' (got: %s, expected: %s)" % (_lpy_got, _lpy_expected))
        if hasattr(_pt_out_v, 'offset') and _pt_out_v.offset:
            raise ValueError("Argument '_pt_out_v' does not allow arrays with offsets. Try passing default_offset=loopy.auto to make_kernel().")

    del _lpy_made_by_loopy


    # }}}

    # {{{ process _actx_in_1_v

    if _actx_in_1_v is None:
        raise RuntimeError("input argument '_actx_in_1_v' must be supplied")

    if True:
        if _actx_in_1_v.dtype != _lpy_dtype_id:
            raise TypeError("dtype mismatch on argument '_actx_in_1_v' (got: %s, expected: np:dtype('float64'))" % _actx_in_1_v.dtype)
        if _actx_in_1_v.shape != (10,):
            raise ValueError("shape mismatch on argument '_actx_in_1_v' (got: %s, expected: %s)" % (_actx_in_1_v.shape, (10,)))
        (_lpy_shape_0,) = _actx_in_1_v.shape
        (_lpy_stride_0,) = _actx_in_1_v.strides
        if not (((_lpy_shape_0 == 1 or _lpy_stride_0 == 8)) or not _lpy_shape_0):
            _lpy_got = _actx_in_1_v.strides
            _lpy_expected = (8,)
            raise ValueError("strides mismatch on argument '_actx_in_1_v' (got: %s, expected: %s)" % (_lpy_got, _lpy_expected))

    # }}}

    # {{{ process _pt_out_u

    _lpy_made_by_loopy = False

    if _pt_out_u is None:
        _lpy_ustrides_0 = 1
        assert _lpy_ustrides_0 >= 0, "'_pt_out_u' has negative stride in axis 0"
        _lpy_size = 1 + 0 + _lpy_ustrides_0*9
        _pt_out_u = _lpy_cl_array.Array(None, (10,), _lpy_dtype_id, strides=(8*_lpy_ustrides_0,), data=allocator(8*_lpy_size), allocator=allocator, _fast=True, _size=_lpy_size, _context=queue.context, _queue=queue)
        del _lpy_ustrides_0
        del _lpy_size

        _lpy_made_by_loopy = True

    if not _lpy_made_by_loopy:
        if _pt_out_u.dtype != _lpy_dtype_id:
            raise TypeError("dtype mismatch on argument '_pt_out_u' (got: %s, expected: np:dtype('float64'))" % _pt_out_u.dtype)
        if _pt_out_u.shape != (10,):
            raise ValueError("shape mismatch on argument '_pt_out_u' (got: %s, expected: %s)" % (_pt_out_u.shape, (10,)))
        (_lpy_shape_0,) = _pt_out_u.shape
        (_lpy_stride_0,) = _pt_out_u.strides
        if not (((_lpy_shape_0 == 1 or _lpy_stride_0 == 8)) or not _lpy_shape_0):
            _lpy_got = _pt_out_u.strides
            _lpy_expected = (8,)
            raise ValueError("strides mismatch on argument '_pt_out_u' (got: %s, expected: %s)" % (_lpy_got, _lpy_expected))
        if hasattr(_pt_out_u, 'offset') and _pt_out_u.offset:
            raise ValueError("Argument '_pt_out_u' does not allow arrays with offsets. Try passing default_offset=loopy.auto to make_kernel().")

    del _lpy_made_by_loopy


    # }}}

    # }}}

    if wait_for is None:
        wait_for = []

    wait_for.extend(_actx_in_1_u.events)
    wait_for.extend(_pt_out_v.events)
    wait_for.extend(_actx_in_1_v.events)
    wait_for.extend(_pt_out_u.events)

    _lpy_evt = _lpy_host_scale_and_orthogonalize(_lpy_cl_kernels, queue, _actx_in_0, _actx_in_1_u.base_data, _pt_out_v.base_data, _actx_in_1_v.base_data, _pt_out_u.base_data, _actx_in_1_u_offset, _actx_in_1_v_offset, wait_for=wait_for, allocator=allocator)

    _pt_out_v.add_event(_lpy_evt)
    _pt_out_u.add_event(_lpy_evt)
    return _lpy_evt, {"_pt_out_v": _pt_out_v, "_pt_out_u": _pt_out_u}

=========================================== warnings summary ============================================

@matthiasdiener matthiasdiener marked this pull request as ready for review March 19, 2025 20:32
@inducer inducer force-pushed the scalar-arg-force-value branch from d9708a6 to 8b67e41 Compare March 19, 2025 21:31
@inducer inducer enabled auto-merge (squash) March 19, 2025 21:32
@inducer inducer merged commit 8387306 into main Mar 19, 2025
12 checks passed
@inducer inducer deleted the scalar-arg-force-value branch March 19, 2025 21:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Bad handling of CPU scalars passed to actx.compiled functions

3 participants