diff --git a/doc/conf.py b/doc/conf.py index 2de72f532..0780e3d3d 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -18,16 +18,18 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "numpy": ("https://numpy.org/doc/stable/", None), - "modepy": ("https://documen.tician.de/modepy/", None), + "sympy": ("https://docs.sympy.org/latest/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), "pyopencl": ("https://documen.tician.de/pyopencl/", None), + "pytools": ("https://documen.tician.de/pytools/", None), + "modepy": ("https://documen.tician.de/modepy/", None), "pymbolic": ("https://documen.tician.de/pymbolic/", None), "loopy": ("https://documen.tician.de/loopy/", None), "pytential": ("https://documen.tician.de/pytential/", None), "boxtree": ("https://documen.tician.de/boxtree/", None), - "sympy": ("https://docs.sympy.org/latest/", None), - "matplotlib": ("https://matplotlib.org/stable/", None), + "arraycontext": ("https://documen.tician.de/arraycontext/", None), } nitpick_ignore_regex = [ - ["py:class", r"symengine\.(.+)"], # :cry: - ] + ["py:class", r"symengine\.(.+)"], # :cry: +] diff --git a/doc/misc.rst b/doc/misc.rst index f977fffd5..7b4f427f8 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -5,6 +5,9 @@ Misc Tools .. automodule:: sumpy.symbolic +.. automodule:: sumpy.tools + +.. automodule:: sumpy.array_context Installation ============ diff --git a/sumpy/array_context.py b/sumpy/array_context.py index 760f7d5d8..3c3288495 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -26,6 +26,9 @@ register_pytest_array_context_factory) __doc__ = """ +Array Context +------------- + .. autoclass:: PyOpenCLArrayContext """ diff --git a/sumpy/tools.py b/sumpy/tools.py index be404fe9d..6b92c125d 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -24,41 +24,95 @@ THE SOFTWARE. """ -from pytools import memoize_method -from pytools.tag import Tag, tag_dataclass -import numbers -import warnings -import os -import sys import enum +import logging +import warnings from abc import ABC, abstractmethod from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple +import loopy as lp +import numpy as np from pymbolic.mapper import WalkMapper -import pymbolic +from pytools import memoize_method +from pytools.tag import Tag, tag_dataclass -import numpy as np import sumpy.symbolic as sym -import pyopencl as cl -import pyopencl.array as cla -import loopy as lp -from typing import Any, List, Optional, TYPE_CHECKING +if TYPE_CHECKING: + import numpy + import pyopencl + + from sumpy.kernel import Kernel -import logging logger = logging.getLogger(__name__) -if TYPE_CHECKING: - from sumpy.kernel import Kernel + +__doc__ = """ +Tools +===== + +.. autofunction:: to_complex_dtype +.. autofunction:: is_obj_array_like +.. autofunction:: vector_to_device +.. autofunction:: vector_from_device +.. autoclass:: OrderedSet + +Multi-index Helpers +------------------- + +.. autofunction:: add_mi +.. autofunction:: mi_factorial +.. autofunction:: mi_increment_axis +.. autofunction:: mi_set_axis +.. autofunction:: mi_power + +Symbolic Helpers +---------------- + +.. autofunction:: add_to_sac +.. autofunction:: gather_arguments +.. autofunction:: gather_source_arguments +.. autofunction:: gather_loopy_arguments +.. autofunction:: gather_loopy_source_arguments + +.. autoclass:: ScalingAssignmentTag +.. autoclass:: KernelComputation +.. autoclass:: KernelCacheMixin + +.. autofunction:: reduced_row_echelon_form +.. autofunction:: nullspace + +FFT +--- + +.. autofunction:: fft +.. autofunction:: fft_toeplitz_upper_triangular +.. autofunction:: matvec_toeplitz_upper_triangular + +.. autoclass:: FFTBackend + :members: +.. autofunction:: loopy_fft +.. autofunction:: get_opencl_fft_app +.. autofunction:: run_opencl_fft + +Profiling +--------- + +.. autofunction:: get_native_event +.. autoclass:: ProfileGetter +.. autoclass:: AggregateProfilingEvent +.. autoclass:: MarkerBasedProfilingEvent +""" # {{{ multi_index helpers -def add_mi(mi1, mi2): +def add_mi(mi1: Sequence[int], mi2: Sequence[int]) -> Tuple[int, ...]: return tuple([mi1i + mi2i for mi1i, mi2i in zip(mi1, mi2)]) -def mi_factorial(mi): +def mi_factorial(mi: Sequence[int]) -> int: import math result = 1 for mi_i in mi: @@ -66,19 +120,23 @@ def mi_factorial(mi): return result -def mi_increment_axis(mi, axis, increment): +def mi_increment_axis( + mi: Sequence[int], axis: int, increment: int + ) -> Tuple[int, ...]: new_mi = list(mi) new_mi[axis] += increment return tuple(new_mi) -def mi_set_axis(mi, axis, value): +def mi_set_axis(mi: Sequence[int], axis: int, value: int) -> Tuple[int, ...]: new_mi = list(mi) new_mi[axis] = value return tuple(new_mi) -def mi_power(vector, mi, evaluate=True): +def mi_power( + vector: Sequence[Any], mi: Sequence[int], + evaluate: bool = True) -> Any: result = 1 for mi_i, vec_i in zip(mi, vector): if mi_i == 1: @@ -94,8 +152,8 @@ def add_to_sac(sac, expr): if sac is None: return expr - if isinstance(expr, (numbers.Number, sym.Number, int, - float, complex, sym.Symbol)): + from numbers import Number + if isinstance(expr, (Number, sym.Number, sym.Symbol)): return expr name = sac.assign_temp("temp", expr) @@ -148,9 +206,8 @@ def build_matrix(op, dtype=None, shape=None): def vector_to_device(queue, vec): - from pytools.obj_array import obj_array_vectorize - from pyopencl.array import to_device + from pytools.obj_array import obj_array_vectorize def to_dev(ary): return to_device(queue, ary) @@ -227,7 +284,7 @@ def __init__(self, ctx: Any, target_kernels: List["Kernel"], source_kernels: List["Kernel"], strength_usage: Optional[List[int]] = None, - value_dtypes: Optional[List["np.dtype"]] = None, + value_dtypes: Optional[List["numpy.dtype[Any]"]] = None, name: Optional[str] = None, device: Optional[Any] = None) -> None: """ @@ -392,12 +449,12 @@ def get_cached_optimized_kernel(self, **kwargs): @memoize_method def get_cached_kernel_executor(self, **kwargs) -> lp.ExecutorBase: - from sumpy import (code_cache, CACHING_ENABLED, OPT_ENABLED, - NO_CACHE_KERNELS) + from sumpy import CACHING_ENABLED, NO_CACHE_KERNELS, OPT_ENABLED, code_cache if CACHING_ENABLED and not ( NO_CACHE_KERNELS and self.name in NO_CACHE_KERNELS): import loopy.version + from sumpy.version import KERNEL_VERSION cache_key = ( self.get_cache_key() @@ -408,8 +465,7 @@ def get_cached_kernel_executor(self, **kwargs) -> lp.ExecutorBase: try: result = code_cache[cache_key] - logger.debug("{}: kernel cache hit [key={}]".format( - self.name, cache_key)) + logger.debug("%s: kernel cache hit [key=%s]", self.name, cache_key) return result.executor(self.context) except KeyError: pass @@ -621,7 +677,8 @@ class ProfileGetter: def get_native_event(evt): - return evt if isinstance(evt, cl.Event) else evt.native_event + from pyopencl import Event + return evt if isinstance(evt, Event) else evt.native_event class AggregateProfilingEvent: @@ -662,9 +719,11 @@ def wait(self): def loopy_fft(shape, inverse, complex_dtype, index_dtype=None, name=None): - from pymbolic.algorithm import find_factors from math import pi + from pymbolic import var + from pymbolic.algorithm import find_factors + sign = 1 if not inverse else -1 n = shape[-1] @@ -676,7 +735,7 @@ def loopy_fft(shape, inverse, complex_dtype, index_dtype=None, nfft = n - broadcast_dims = tuple(pymbolic.var(f"j{d}") for d in range(len(shape) - 1)) + broadcast_dims = tuple(var(f"j{d}") for d in range(len(shape) - 1)) domains = [ "{[i]: 0<=i FFTBackend: - env_val = os.environ.get("SUMPY_FFT_BACKEND", None) +def _get_fft_backend(queue: "pyopencl.CommandQueue") -> FFTBackend: + import os + + env_val = os.environ.get("SUMPY_FFT_BACKEND") if env_val: if env_val not in ["loopy", "pyvkfft"]: raise ValueError("Expected 'loopy' or 'pyvkfft' for SUMPY_FFT_BACKEND. " @@ -840,13 +903,17 @@ def _get_fft_backend(queue) -> FFTBackend: warnings.warn("VkFFT not found. FFT runs will be slower.", stacklevel=3) return FFTBackend.loopy - if queue.properties & cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE: + from pyopencl import command_queue_properties + + if queue.properties & command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE: warnings.warn( "VkFFT does not support out of order queues yet. " "Falling back to slower implementation.", stacklevel=3) return FFTBackend.loopy import platform + import sys + if (sys.platform == "darwin" and platform.machine() == "x86_64" and queue.context.devices[0].platform.name @@ -860,7 +927,11 @@ def _get_fft_backend(queue) -> FFTBackend: return FFTBackend.pyvkfft -def get_opencl_fft_app(queue, shape, dtype, inverse): +def get_opencl_fft_app( + queue: "pyopencl.CommandQueue", + shape: Tuple[int, ...], + dtype: "numpy.dtype[Any]", + inverse: bool) -> Any: """Setup an object for out-of-place FFT on with given shape and dtype on given queue. """ @@ -879,7 +950,12 @@ def get_opencl_fft_app(queue, shape, dtype, inverse): raise RuntimeError(f"Unsupported FFT backend {backend}") -def run_opencl_fft(fft_app, queue, input_vec, inverse=False, wait_for=None): +def run_opencl_fft( + fft_app: Tuple[Any, FFTBackend], + queue: "pyopencl.CommandQueue", + input_vec: Any, + inverse: bool = False, + wait_for: List["pyopencl.Event"] = None) -> Tuple["pyopencl.Event", Any]: """Runs an FFT on input_vec and returns a :class:`MarkerBasedProfilingEvent` that indicate the end and start of the operations carried out and the output vector. @@ -894,6 +970,9 @@ def run_opencl_fft(fft_app, queue, input_vec, inverse=False, wait_for=None): if wait_for is None: wait_for = [] + import pyopencl as cl + import pyopencl.array as cla + start_evt = cl.enqueue_marker(queue, wait_for=wait_for[:]) if app.inplace: @@ -929,21 +1008,19 @@ def run_opencl_fft(fft_app, queue, input_vec, inverse=False, wait_for=None): "KernelCacheWrapper": ("KernelCacheMixin", 2023), } -if sys.version_info >= (3, 7): - def __getattr__(name): - replacement_and_obj = _depr_name_to_replacement_and_obj.get(name, None) - if replacement_and_obj is not None: - replacement, obj, year = replacement_and_obj - from warnings import warn - warn(f"'sumpy.tools.{name}' is deprecated. " - f"Use '{replacement}' instead. " - f"'sumpy.tools.{name}' will continue to work until {year}.", - DeprecationWarning, stacklevel=2) - return obj - else: - raise AttributeError(name) -else: - KernelCacheWrapper = KernelCacheMixin + +def __getattr__(name): + replacement_and_obj = _depr_name_to_replacement_and_obj.get(name, None) + if replacement_and_obj is not None: + replacement, obj, year = replacement_and_obj + from warnings import warn + warn(f"'sumpy.tools.{name}' is deprecated. " + f"Use '{replacement}' instead. " + f"'sumpy.tools.{name}' will continue to work until {year}.", + DeprecationWarning, stacklevel=2) + return obj + else: + raise AttributeError(name) # }}}