diff --git a/setup.py b/setup.py index 5ca544177..ed6a5d435 100644 --- a/setup.py +++ b/setup.py @@ -103,6 +103,7 @@ def write_git_revision(package_name): "boxtree>=2018.1", "pytest>=2.3", "pyrsistent>=0.16.0", + "arraycontext", "dataclasses>=0.7;python_version<='3.6'", "sympy>=0.7.2", "pymbolic>=2021.1", diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 6f742f727..5f70b35e2 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -49,8 +49,8 @@ # {{{ translation base class class E2EBase(KernelCacheWrapper): - def __init__(self, ctx, src_expansion, tgt_expansion, - name=None, device=None): + def __init__(self, src_expansion, tgt_expansion, + name=None): """ :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` :arg strength_usage: A list of integers indicating which expression @@ -59,9 +59,6 @@ def __init__(self, ctx, src_expansion, tgt_expansion, Default: all kernels use the same strength. """ - if device is None: - device = ctx.devices[0] - if src_expansion is tgt_expansion: from sumpy.kernel import (TargetTransformationRemover, SourceTransformationRemover) @@ -80,11 +77,9 @@ def __init__(self, ctx, src_expansion, tgt_expansion, SourceTransformationRemover()( TargetTransformationRemover()(tgt_expansion.kernel))) - self.ctx = ctx self.src_expansion = src_expansion self.tgt_expansion = tgt_expansion self.name = name or self.default_name - self.device = device if src_expansion.dim != tgt_expansion.dim: raise ValueError("source and target expansions must have " @@ -149,11 +144,6 @@ class E2EFromCSR(E2EBase): default_name = "e2e_from_csr" - def __init__(self, ctx, src_expansion, tgt_expansion, - name=None, device=None): - super().__init__(ctx, src_expansion, tgt_expansion, - name=name, device=device) - def get_translation_loopy_insns(self): from sumpy.symbolic import make_sym_vector dvec = make_sym_vector("d", self.dim) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 421547d36..6adee5279 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -43,8 +43,7 @@ # {{{ E2P base class class E2PBase(KernelCacheWrapper): - def __init__(self, ctx, expansion, kernels, - name=None, device=None): + def __init__(self, expansion, kernels, name=None): """ :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` :arg strength_usage: A list of integers indicating which expression @@ -53,9 +52,6 @@ def __init__(self, ctx, expansion, kernels, Default: all kernels use the same strength. """ - if device is None: - device = ctx.devices[0] - from sumpy.kernel import (SourceTransformationRemover, TargetTransformationRemover) sxr = SourceTransformationRemover() @@ -67,11 +63,9 @@ def __init__(self, ctx, expansion, kernels, for knl in kernels: assert txr(knl) == expansion.kernel - self.ctx = ctx self.expansion = expansion self.kernels = kernels self.name = name or self.default_name - self.device = device self.dim = expansion.dim diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 52cb9a603..0ba56d3db 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -27,12 +27,15 @@ """ +import numpy as np import pyopencl as cl import pyopencl.array # noqa from pytools import memoize_method from boxtree.fmm import TreeIndependentDataForWrangler, ExpansionWranglerInterface +from arraycontext import ArrayContext, Array + from sumpy import ( P2EFromSingleBox, P2EFromCSR, E2PFromSingleBox, E2PFromCSR, @@ -58,7 +61,7 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): profiling enabled. """ - def __init__(self, cl_context, + def __init__(self, setup_actx: ArrayContext, multipole_expansion_factory, local_expansion_factory, target_kernels, exclude_self=False, use_rscale=None, @@ -78,6 +81,8 @@ def __init__(self, cl_context, expansion and postprocessing of the target local expansion for multipole-to-local expansion. """ + self._setup_actx = setup_actx.clone() + self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory self.source_kernels = source_kernels @@ -93,8 +98,6 @@ def __init__(self, cl_context, super().__init__() - self.cl_context = cl_context - @memoize_method def get_base_kernel(self): from pytools import single_valued @@ -112,21 +115,21 @@ def local_expansion(self, order): @memoize_method def p2m(self, tgt_order): - return P2EFromSingleBox(self.cl_context, + return P2EFromSingleBox( kernels=self.source_kernels, expansion=self.multipole_expansion(tgt_order), strength_usage=self.strength_usage) @memoize_method def p2l(self, tgt_order): - return P2EFromCSR(self.cl_context, + return P2EFromCSR( kernels=self.source_kernels, expansion=self.local_expansion(tgt_order), strength_usage=self.strength_usage) @memoize_method def m2m(self, src_order, tgt_order): - return E2EFromChildren(self.cl_context, + return E2EFromChildren( self.multipole_expansion(src_order), self.multipole_expansion(tgt_order)) @@ -137,49 +140,49 @@ def m2l(self, src_order, tgt_order, m2l_class = M2LUsingTranslationClassesDependentData else: m2l_class = E2EFromCSR - return m2l_class(self.cl_context, + return m2l_class( self.multipole_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def m2l_translation_class_dependent_data_kernel(self, src_order, tgt_order): - return M2LGenerateTranslationClassesDependentData(self.cl_context, + return M2LGenerateTranslationClassesDependentData( self.multipole_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def m2l_preprocess_mpole_kernel(self, src_order, tgt_order): - return M2LPreprocessMultipole(self.cl_context, + return M2LPreprocessMultipole( self.multipole_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def m2l_postprocess_local_kernel(self, src_order, tgt_order): - return M2LPostprocessLocal(self.cl_context, + return M2LPostprocessLocal( self.multipole_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def l2l(self, src_order, tgt_order): - return E2EFromParent(self.cl_context, + return E2EFromParent( self.local_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def m2p(self, src_order): - return E2PFromCSR(self.cl_context, + return E2PFromCSR( self.multipole_expansion(src_order), self.target_kernels) @memoize_method def l2p(self, src_order): - return E2PFromSingleBox(self.cl_context, + return E2PFromSingleBox( self.local_expansion(src_order), self.target_kernels) @memoize_method def p2p(self): - return P2PFromCSR(self.cl_context, target_kernels=self.target_kernels, + return P2PFromCSR(target_kernels=self.target_kernels, source_kernels=self.source_kernels, exclude_self=self.exclude_self, strength_usage=self.strength_usage) @@ -363,37 +366,26 @@ def order_to_size(order): return build_csr_level_starts(self.level_orders, order_to_size, level_starts=self.m2l_translation_class_level_start_box_nrs()) - def multipole_expansion_zeros(self, template_ary): + def multipole_expansion_zeros(self, actx: ArrayContext) -> Array: """Return an expansions array (which must support addition) capable of holding one multipole or local expansion for every box in the tree. - :arg template_ary: an array (not necessarily of the same shape or dtype as - the one to be created) whose run-time environment - (e.g. :class:`pyopencl.CommandQueue`) the returned array should - reuse. """ - return cl.array.zeros( - template_ary.queue, + return actx.zeros( self.multipole_expansions_level_starts()[-1], dtype=self.dtype) - def local_expansion_zeros(self, template_ary): + def local_expansion_zeros(self, actx) -> Array: """Return an expansions array (which must support addition) capable of holding one multipole or local expansion for every box in the tree. - :arg template_ary: an array (not necessarily of the same shape or dtype as - the one to be created) whose run-time environment - (e.g. :class:`pyopencl.CommandQueue`) the returned array should - reuse. """ - return cl.array.zeros( - template_ary.queue, + return actx.zeros( self.local_expansions_level_starts()[-1], dtype=self.dtype) - def m2l_translation_classes_dependent_data_zeros(self, queue): - return cl.array.zeros( - queue, + def m2l_translation_classes_dependent_data_zeros(self, actx): + return actx.zeros( self.m2l_translation_classes_dependent_data_level_starts()[-1], dtype=self.preprocessed_mpole_dtype) @@ -435,9 +427,8 @@ def order_to_size(order): return build_csr_level_starts(self.level_orders, order_to_size, level_starts=self.tree.level_start_box_nrs) - def m2l_preproc_mpole_expansion_zeros(self, template_ary): - return cl.array.zeros( - template_ary.queue, + def m2l_preproc_mpole_expansion_zeros(self, actx: ArrayContext): + return actx.zeros( self.m2l_preproc_mpole_expansions_level_starts()[-1], dtype=self.preprocessed_mpole_dtype) @@ -454,27 +445,22 @@ def m2l_preproc_mpole_expansions_view(self, mpole_exps, level): m2l_work_array_level_starts = \ m2l_preproc_mpole_expansions_level_starts - def output_zeros(self, template_ary): + def output_zeros(self, actx: ArrayContext) -> np.ndarray: """Return a potentials array (which must support addition) capable of holding a potential value for each target in the tree. Note that :func:`drive_fmm` makes no assumptions about *potential* other than that it supports addition--it may consist of potentials, gradients of the potential, or arbitrary other per-target output data. - :arg template_ary: an array (not necessarily of the same shape or dtype as - the one to be created) whose run-time environment - (e.g. :class:`pyopencl.CommandQueue`) the returned array should - reuse. """ from pytools.obj_array import make_obj_array return make_obj_array([ - cl.array.zeros( - template_ary.queue, + actx.zeros( self.tree.ntargets, dtype=self.dtype) for k in self.tree_indep.target_kernels]) def reorder_sources(self, source_array): - return source_array.with_queue(source_array.queue)[self.tree.user_source_ids] + return source_array[self.tree.user_source_ids] def reorder_potentials(self, potentials): from pytools.obj_array import obj_array_vectorize @@ -525,16 +511,14 @@ def box_target_list_kwargs(self): # }}} def form_multipoles(self, + actx: ArrayContext, level_start_source_box_nrs, source_boxes, src_weight_vecs): - mpoles = self.multipole_expansion_zeros(src_weight_vecs[0]) + mpoles = self.multipole_expansion_zeros(actx) kwargs = self.extra_kwargs.copy() kwargs.update(self.box_source_list_kwargs()) - events = [] - queue = src_weight_vecs[0].queue - for lev in range(self.tree.nlevels): p2m = self.tree_indep.p2m(self.level_orders[lev]) start, stop = level_start_source_box_nrs[lev:lev+2] @@ -545,7 +529,7 @@ def form_multipoles(self, mpoles, lev) evt, (mpoles_res,) = p2m( - queue, + actx, source_boxes=source_boxes[start:stop], centers=self.tree.box_centers, strengths=src_weight_vecs, @@ -554,13 +538,13 @@ def form_multipoles(self, rscale=self.level_to_rscale(lev), **kwargs) - events.append(evt) assert mpoles_res is mpoles_view - return (mpoles, SumpyTimingFuture(queue, events)) + return mpoles def coarsen_multipoles(self, + actx: ArrayContext, level_start_source_parent_box_nrs, source_parent_boxes, mpoles): @@ -618,9 +602,11 @@ def coarsen_multipoles(self, return (mpoles, SumpyTimingFuture(queue, events)) - def eval_direct(self, target_boxes, source_box_starts, + def eval_direct(self, + actx: ArrayContext, + target_boxes, source_box_starts, source_box_lists, src_weight_vecs): - pot = self.output_zeros(src_weight_vecs[0]) + pot = self.output_zeros(actx) kwargs = self.extra_kwargs.copy() kwargs.update(self.self_extra_kwargs) @@ -715,13 +701,14 @@ def _add_m2l_precompute_kwargs(self, kwargs_for_m2l, self.translation_classes_data.from_sep_siblings_translation_classes def multipole_to_local(self, + actx: ArrayContext, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, mpole_exps): preprocess_evts = [] queue = mpole_exps.queue - local_exps = self.local_expansion_zeros(mpole_exps) + local_exps = self.local_expansion_zeros(actx) if self.tree_indep.use_preprocessing_for_m2l: preprocessed_mpole_exps = \ @@ -838,8 +825,9 @@ def multipole_to_local(self, return (local_exps, SumpyTimingFuture(queue, timing_events)) def eval_multipoles(self, + actx: ArrayContext, target_boxes_by_source_level, source_boxes_by_level, mpole_exps): - pot = self.output_zeros(mpole_exps) + pot = self.output_zeros(actx) kwargs = self.kernel_extra_kwargs.copy() kwargs.update(self.box_target_list_kwargs()) @@ -889,9 +877,10 @@ def eval_multipoles(self, return (pot, SumpyTimingFuture(queue, events)) def form_locals(self, + actx: ArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weight_vecs): - local_exps = self.local_expansion_zeros(src_weight_vecs[0]) + local_exps = self.local_expansion_zeros(actx) kwargs = self.extra_kwargs.copy() kwargs.update(self.box_source_list_kwargs()) @@ -931,6 +920,7 @@ def form_locals(self, return (local_exps, SumpyTimingFuture(queue, events)) def refine_locals(self, + actx: ArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): @@ -976,8 +966,10 @@ def refine_locals(self, return (local_exps, SumpyTimingFuture(queue, [evt])) - def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): - pot = self.output_zeros(local_exps) + def eval_locals(self, + actx: ArrayContext, + level_start_target_box_nrs, target_boxes, local_exps): + pot = self.output_zeros(actx) kwargs = self.kernel_extra_kwargs.copy() kwargs.update(self.box_target_list_kwargs()) @@ -1015,7 +1007,7 @@ def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): return (pot, SumpyTimingFuture(queue, events)) - def finalize_potentials(self, potentials, template_ary): + def finalize_potentials(self, actx: ArrayContext, potentials): return potentials # }}} diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 177d0b586..3fd071b53 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -49,8 +49,8 @@ class P2EBase(KernelComputation, KernelCacheWrapper): .. automethod:: __init__ """ - def __init__(self, ctx, expansion, kernels=None, - name=None, device=None, strength_usage=None): + def __init__(self, expansion, kernels=None, + name=None, strength_usage=None): """ :arg expansion: a subclass of :class:`sumpy.expansion.ExpansionBase` :arg kernels: if not provided, the kernel of the *expansion* is used. @@ -78,10 +78,10 @@ def __init__(self, ctx, expansion, kernels=None, assert txr(knl) == knl assert sxr(knl) == expansion.kernel - KernelComputation.__init__(self, ctx=ctx, target_kernels=[], + KernelComputation.__init__(self, target_kernels=[], source_kernels=kernels, strength_usage=strength_usage, value_dtypes=None, - name=name, device=device) + name=name) self.expansion = expansion self.dim = expansion.dim @@ -135,11 +135,11 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): enforce_variable_access_ordered="no_check") return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): from sumpy.tools import is_obj_array_like sources = kwargs.pop("sources") centers = kwargs.pop("centers") - knl = self.get_cached_optimized_kernel( + knl = self.get_kernel( sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) @@ -148,7 +148,7 @@ def __call__(self, queue, **kwargs): dtype = centers[0].dtype if is_obj_array_like(centers) else centers.dtype rscale = dtype.type(kwargs.pop("rscale")) - return knl(queue, sources=sources, centers=centers, rscale=rscale, **kwargs) + return actx.call_loopy(knl, sources=sources, centers=centers, rscale=rscale, **kwargs) # }}} diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 381918462..fa4154178 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -51,8 +51,8 @@ # {{{ p2p base class class P2PBase(KernelComputation, KernelCacheWrapper): - def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None, - value_dtypes=None, name=None, device=None, source_kernels=None): + def __init__(self, target_kernels, exclude_self, strength_usage=None, + value_dtypes=None, name=None, source_kernels=None): """ :arg target_kernels: list of :class:`sumpy.kernel.Kernel` instances with only target derivatives. @@ -82,9 +82,9 @@ def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None, base_target_kernel = single_valued(txr(knl) for knl in target_kernels) assert base_source_kernel == base_target_kernel - KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels, + KernelComputation.__init__(self, target_kernels=target_kernels, source_kernels=source_kernels, strength_usage=strength_usage, - value_dtypes=value_dtypes, name=name, device=device) + value_dtypes=value_dtypes, name=name) self.exclude_self = exclude_self @@ -94,8 +94,7 @@ def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None, def get_cache_key(self): return (type(self).__name__, tuple(self.target_kernels), self.exclude_self, tuple(self.strength_usage), tuple(self.value_dtypes), - tuple(self.source_kernels), - self.device.hashable_model_and_version_identifier) + tuple(self.source_kernels)) def get_loopy_insns_and_result_names(self): from sumpy.symbolic import make_sym_vector @@ -638,10 +637,8 @@ def get_kernel(self, max_nsources_in_one_box, max_ntargets_in_one_box, return loopy_knl def get_optimized_kernel(self, max_nsources_in_one_box, - max_ntargets_in_one_box): - import pyopencl as cl - dev = self.context.devices[0] - if dev.type & cl.device_type.CPU: + max_ntargets_in_one_box, is_cpu): + if is_cpu: knl = self.get_kernel(max_nsources_in_one_box, max_ntargets_in_one_box, gpu=False) knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0") @@ -661,11 +658,13 @@ def get_optimized_kernel(self, max_nsources_in_one_box, return knl def __call__(self, queue, **kwargs): + import pyopencl as cl max_nsources_in_one_box = kwargs.pop("max_nsources_in_one_box") max_ntargets_in_one_box = kwargs.pop("max_ntargets_in_one_box") knl = self.get_cached_optimized_kernel( max_nsources_in_one_box=max_nsources_in_one_box, - max_ntargets_in_one_box=max_ntargets_in_one_box) + max_ntargets_in_one_box=max_ntargets_in_one_box, + is_cpu=queue.dev.type & cl.device_type.CPU) return knl(queue, **kwargs) diff --git a/sumpy/tools.py b/sumpy/tools.py index 14152d706..5dfbb1043 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -571,8 +571,8 @@ class ScalingAssignmentTag(Tag): class KernelComputation: """Common input processing for kernel computations.""" - def __init__(self, ctx, target_kernels, source_kernels, strength_usage, - value_dtypes, name, device=None): + def __init__(self, target_kernels, source_kernels, strength_usage, + value_dtypes, name): """ :arg kernels: list of :class:`sumpy.kernel.Kernel` instances :class:`sumpy.kernel.TargetDerivative` wrappers should be @@ -610,12 +610,6 @@ def __init__(self, ctx, target_kernels, source_kernels, strength_usage, # }}} - if device is None: - device = ctx.devices[0] - - self.context = ctx - self.device = device - self.source_kernels = tuple(source_kernels) self.target_kernels = tuple(target_kernels) self.value_dtypes = value_dtypes diff --git a/test/test_fmm.py b/test/test_fmm.py index 8e7bc3299..367893137 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -25,8 +25,13 @@ import numpy as np import numpy.linalg as la import pyopencl as cl -from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +from arraycontext.pytest import PytestPyOpenCLArrayContextFactory +from arraycontext import ( # noqa: F401 + pytest_generate_tests_for_array_contexts, + _acf + ) + from sumpy.kernel import LaplaceKernel, HelmholtzKernel, YukawaKernel from sumpy.expansion.multipole import ( VolumeTaylorMultipoleExpansion, @@ -46,12 +51,9 @@ logger = logging.getLogger(__name__) -try: - import faulthandler -except ImportError: - pass -else: - faulthandler.enable() +pytest_generate_tests = pytest_generate_tests_for_array_contexts([ + PytestPyOpenCLArrayContextFactory + ]) @pytest.mark.parametrize("use_translation_classes, use_fft", @@ -80,7 +82,7 @@ (YukawaKernel(2), Y2DLocalExpansion, Y2DMultipoleExpansion, False), ]) -def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class, +def test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class, order_varies_with_level, use_translation_classes, use_fft): logging.basicConfig(level=logging.INFO) @@ -90,8 +92,9 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class, if local_expn_class in [H2DLocalExpansion, Y2DLocalExpansion] and use_fft: pytest.skip("Fourier/Bessel based expansions with FFT is not supported yet.") - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) + actx = actx_factory() + queue = actx.queue + cl_ctx = queue.context nsources = 1000 ntargets = 300 @@ -118,13 +121,13 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class, [fp.points[i] for i in range(knl.dim)]) from boxtree import TreeBuilder - tb = TreeBuilder(ctx) + tb = TreeBuilder(cl_ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(ctx) + tbuild = FMMTraversalBuilder(cl_ctx) trav, _ = tbuild(queue, tree, debug=True) # {{{ plot tree @@ -151,7 +154,7 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class, # }}} from pyopencl.clrandom import PhiloxGenerator - rng = PhiloxGenerator(ctx, seed=44) + rng = PhiloxGenerator(cl_ctx, seed=44) weights = rng.uniform(queue, nsources, dtype=np.float64) logger.info("computing direct (reference) result") @@ -186,7 +189,7 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class, target_kernels = [knl] tree_indep = SumpyTreeIndependentDataForWrangler( - ctx, + actx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), target_kernels, use_fft_for_m2l=use_fft) @@ -205,10 +208,10 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): from boxtree.fmm import drive_fmm - pot, = drive_fmm(wrangler, (weights,)) + pot, = drive_fmm(actx, wrangler, (weights,)) from sumpy import P2P - p2p = P2P(ctx, target_kernels, exclude_self=False) + p2p = P2P(target_kernels, exclude_self=False) evt, (ref_pot,) = p2p(queue, targets, sources, (weights,), **extra_kwargs)