diff --git a/.gitignore b/.gitignore index daf719e6f..c89b827fb 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,8 @@ examples/*.pdf *.vtu *.vts +*.png +*.gif .cache diff --git a/doc/conf.py b/doc/conf.py index 9e5395862..a9dccf5b3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -35,6 +35,7 @@ } nitpick_ignore_regex = [ + ["py:class", r".*_ProxyNeighborEvaluationResult"], # Sphinx started complaining about these in 8.2.1(-ish) # -AK, 2025-02-24 ["py:class", r"TypeAliasForwardRef"], @@ -70,13 +71,13 @@ "cl_array.Array": "obj:pyopencl.array.Array", # pymbolic "ArithmeticExpression": "obj:pymbolic.ArithmeticExpression", + "ArithmeticExpressionContainerTc": + "obj:pymbolic.typing.ArithmeticExpressionContainerTc", "Expression": "obj:pymbolic.typing.Expression", "MultiVector": "obj:pymbolic.geometric_algebra.MultiVector", "Variable": "class:pymbolic.primitives.Variable", "prim.Subscript": "class:pymbolic.primitives.Subscript", "prim.Variable": "class:pymbolic.primitives.Variable", - "ArithmeticExpressionContainerTc": - "obj:pymbolic.typing.ArithmeticExpressionContainerTc", # arraycontext "ArrayContainer": "obj:arraycontext.ArrayContainer", "ArrayOrContainerOrScalar": "obj:arraycontext.ArrayOrContainerOrScalar", @@ -91,6 +92,7 @@ # boxtree "FromSepSmallerCrit": "obj:boxtree.traversal.FromSepSmallerCrit", "TimingResult": "class:boxtree.timing.TimingResult", + "Tree": "obj:boxtree.tree.Tree", "TreeKind": "obj:boxtree.tree_build.TreeKind", # sumpy "ExpansionBase": "class:sumpy.expansion.ExpansionBase", @@ -114,11 +116,10 @@ "Side": "obj:pytential.symbolic.primitives.Side", "TargetOrDiscretization": "obj:pytential.target.TargetOrDiscretization", "VectorExpression": "obj:pytential.symbolic.pde.scalar.VectorExpression", - "pytential.symbolic.dof_desc.DOFDescriptorLike": - "data:pytential.symbolic.dof_desc.DOFDescriptorLike", - "pytential.symbolic.primitives.ExpressionNode": - "class:pytential.symbolic.primitives.ExpressionNode", + "pytential.symbolic.dof_desc.DOFDescriptorLike": "data:pytential.symbolic.dof_desc.DOFDescriptorLike", # noqa: E501 + "pytential.symbolic.primitives.ExpressionNode": "class:pytential.symbolic.primitives.ExpressionNode", # noqa: E501 "sym.DOFDescriptor": "class:pytential.symbolic.dof_desc.DOFDescriptor", + "sym.DOFDescriptorLike": "obj:pytential.symbolic.dof_desc.DOFDescriptorLike", "sym.IntG": "class:pytential.symbolic.primitives.IntG", "sym.var": "obj:pytential.symbolic.primitives.var", } diff --git a/doc/linalg.rst b/doc/linalg.rst index de26425a0..97f12acbf 100644 --- a/doc/linalg.rst +++ b/doc/linalg.rst @@ -32,6 +32,7 @@ Low-level Functionality All the classes and routines in this module are experimental and the API can change at any point. +.. automodule:: pytential.linalg.cluster .. automodule:: pytential.linalg.proxy .. automodule:: pytential.linalg.skeletonization diff --git a/examples/scaling-study-hmatrix.py b/examples/scaling-study-hmatrix.py new file mode 100644 index 000000000..a66f82111 --- /dev/null +++ b/examples/scaling-study-hmatrix.py @@ -0,0 +1,199 @@ +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging +from dataclasses import dataclass + +import numpy as np + +from meshmode.array_context import PyOpenCLArrayContext +from pytools.convergence import EOCRecorder + +from pytential import GeometryCollection, sym + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class Timings: + build: float + matvec: float + + +def run_hmatrix_matvec( + actx: PyOpenCLArrayContext, + places: GeometryCollection, *, + dofdesc: sym.DOFDescriptor) -> None: + from sumpy.kernel import LaplaceKernel + kernel = LaplaceKernel(places.ambient_dim) + sym_u = sym.var("u") + sym_op = -0.5 * sym_u + sym.D(kernel, sym_u, qbx_forced_limit="avg") + + density_discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) + u = actx.thaw(density_discr.nodes()[0]) + + def build_hmat(): + from pytential.linalg.hmatrix import build_hmatrix_by_proxy + return build_hmatrix_by_proxy( + actx, places, sym_op, sym_u, + domains=[dofdesc], + context={}, + auto_where=dofdesc, + id_eps=1.0e-10, + _tree_kind="adaptive-level-restricted", + _approx_nproxy=64, + _proxy_radius_factor=1.15).get_forward() + + # warmup + from pytools import ProcessTimer + with ProcessTimer() as pt: + hmat = build_hmat() + actx.queue.finish() + + logger.info("build(warmup): %s", pt) + + # build + with ProcessTimer() as pt: + hmat = build_hmat() + actx.queue.finish() + + t_build = pt.wall_elapsed + logger.info("build: %s", pt) + + # matvec + with ProcessTimer() as pt: + du = hmat @ u + assert du is not None + actx.queue.finish() + + t_matvec = pt.wall_elapsed + logger.info("matvec: %s", pt) + + return Timings(t_build, t_matvec) + + +def run_scaling_study( + ambient_dim: int, *, + target_order: int = 4, + source_ovsmp: int = 4, + qbx_order: int = 4, + ) -> None: + dd = sym.DOFDescriptor(f"d{ambient_dim}", discr_stage=sym.QBX_SOURCE_STAGE2) + + import pyopencl as cl + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + actx = PyOpenCLArrayContext(queue) + + eoc_build = EOCRecorder() + eoc_matvec = EOCRecorder() + + import meshmode.discretization.poly_element as mpoly + import meshmode.mesh.generation as mgen + + resolutions = [64, 128, 256, 512, 1024, 1536, 2048, 2560, 3072] + + for n in resolutions: + mesh = mgen.make_curve_mesh( + mgen.NArmedStarfish(5, 0.25), + np.linspace(0, 1, n), + order=target_order) + + from meshmode.discretization import Discretization + pre_density_discr = Discretization(actx, mesh, + mpoly.InterpolatoryQuadratureGroupFactory(target_order)) + + from pytential.qbx import QBXLayerPotentialSource + qbx = QBXLayerPotentialSource( + pre_density_discr, + fine_order=source_ovsmp * target_order, + qbx_order=qbx_order, + fmm_order=False, fmm_backend=None, + ) + places = GeometryCollection(qbx, auto_where=dd.geometry) + density_discr = places.get_discretization(dd.geometry, dd.discr_stage) + + logger.info("ndofs: %d", density_discr.ndofs) + logger.info("nelements: %d", density_discr.mesh.nelements) + + timings = run_hmatrix_matvec(actx, places, dofdesc=dd) + eoc_build.add_data_point(density_discr.ndofs, timings.build) + eoc_matvec.add_data_point(density_discr.ndofs, timings.matvec) + + for name, eoc in [("build", eoc_build), ("matvec", eoc_matvec)]: + logger.info("%s\n%s", + name, eoc.pretty_print( + abscissa_label="dofs", + error_label=f"{name} (s)", + abscissa_format="%d", + error_format="%.3fs", + eoc_format="%.2f", + ) + ) + visualize_eoc(f"scaling-study-hmatrix-{name}", eoc, 1) + + +def visualize_eoc( + filename: str, eoc: EOCRecorder, order: int, + overwrite: bool = False) -> None: + try: + import matplotlib.pyplot as plt + except ImportError: + logger.info("matplotlib not available for plotting") + return + + fig = plt.figure(figsize=(10, 10), dpi=300) + ax = fig.gca() + + h, error = np.array(eoc.history).T # type: ignore[no-untyped-call] + ax.loglog(h, error, "o-") + + max_h = np.max(h) + min_e = np.min(error) + max_e = np.max(error) + min_h = np.exp(np.log(max_h) + np.log(min_e / max_e) / order) + + ax.loglog( + [max_h, min_h], [max_e, min_e], "k-", label=rf"$\mathcal{{O}}(h^{order})$" + ) + + # }}} + + ax.grid(True, which="major", linestyle="-", alpha=0.75) + ax.grid(True, which="minor", linestyle="--", alpha=0.5) + + ax.set_xlabel("$N$") + ax.set_ylabel("$T~(s)$") + + import pathlib + filename = pathlib.Path(filename) + if not overwrite and filename.exists(): + raise FileExistsError(f"output file '{filename}' already exists") + + fig.savefig(filename) + plt.close(fig) + + +if __name__ == "__main__": + run_scaling_study(ambient_dim=2) diff --git a/pytential/linalg/cluster.py b/pytential/linalg/cluster.py new file mode 100644 index 000000000..b8a96c46e --- /dev/null +++ b/pytential/linalg/cluster.py @@ -0,0 +1,595 @@ +from __future__ import annotations + + +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging +import pathlib +from dataclasses import dataclass, replace +from functools import singledispatch +from typing import TYPE_CHECKING, Any + +import numpy as np + +from arraycontext import PyOpenCLArrayContext +from meshmode.discretization import Discretization +from pytools import log_process, memoize_method, obj_array + +from pytential import GeometryCollection, sym +from pytential.linalg.utils import IndexList, TargetAndSourceClusterList +from pytential.qbx import QBXLayerPotentialSource + + +if TYPE_CHECKING: + from collections.abc import Iterator + + import optype.numpy as onp + + from boxtree.tree import Tree + from boxtree.tree_build import TreeKind + + from pytential.linalg.proxy import ProxyGenerator + + +logger = logging.getLogger(__name__) + +__doc__ = """ +Clustering +~~~~~~~~~~ + +.. autoclass:: ClusterLevel +.. autoclass:: ClusterTree + +.. autofunction:: split_array +.. autofunction:: cluster +.. autofunction:: uncluster + +.. autofunction:: partition_by_nodes +""" + +# FIXME: this is just an arbitrary value +_DEFAULT_MAX_PARTICLES_IN_BOX = 32 + + +# {{{ cluster tree + + +def make_cluster_parent_map( + parent_ids: onp.Array1D[np.integer], + ) -> obj_array.ObjectArray1D[onp.Array1D[np.integer]]: + """Construct a parent map for :attr:`ClusterLevel.parent_map`.""" + # NOTE: np.unique returns a sorted array + unique_parent_ids = np.unique(parent_ids) + ids = np.arange(parent_ids.size) + + return obj_array.new_1d([ + ids[parent_ids == unique_parent_ids[i]] + for i in range(unique_parent_ids.size) + ]) + + +@dataclass(frozen=True) +class ClusterLevel: + """A level in a :class:`ClusterTree`. + + .. autoattribute:: level + .. autoattribute:: box_ids + .. autoattribute:: parent_map + .. autoproperty:: nclusters + """ + + level: int + """Current level that is represented.""" + box_ids: onp.Array1D[np.integer] + """Box IDs on the current level.""" + parent_map: obj_array.ObjectArray1D[onp.Array1D[np.integer]] + """An object :class:`~numpy.ndarray` containing buckets of child indices, + i.e. ``parent_map[i]`` contains all the child indices that will cluster + into the same parent. Note that this indexing is local to this level + and is not related to the tree indexing stored by the :class:`ClusterTree`. + """ + + @property + def nclusters(self) -> int: + """Number of clusters on the current level (same as number of boxes + in :attr:`box_ids`). + """ + return self.box_ids.size + + +@dataclass(frozen=True) +class ClusterTree: + r"""Hierarchical cluster representation. + + .. autoattribute:: nlevels + .. autoattribute:: leaf_cluster_box_ids + .. autoattribute:: tree_cluster_parent_ids + + .. autoproperty:: nclusters + .. autoproperty:: levels + .. automethod:: iter_levels + """ + + nlevels: int + """Total number of levels in the tree.""" + leaf_cluster_box_ids: onp.Array1D[np.integer] + """Box IDs for each cluster on the leaf level of the tree.""" + tree_cluster_parent_ids: onp.Array1D[np.integer] + """Parent box IDs for :attr:`leaf_cluster_box_ids`.""" + + # NOTE: only here to allow easier debugging + testing + _tree: Tree | None + + @property + def nclusters(self) -> int: + """Number of clusters in the leaf level of the tree.""" + return self.leaf_cluster_box_ids.size + + @property + @memoize_method + def levels(self) -> obj_array.ObjectArray1D[ClusterLevel]: + r"""An :class:`~numpy.ndarray` of :class:`ClusterLevel`\ s.""" + return obj_array.new_1d(list(self.iter_levels())) + + def iter_levels(self) -> Iterator[ClusterLevel]: + """ + :returns: an iterator over all the :class:`ClusterLevel` levels. + """ + + box_ids = self.leaf_cluster_box_ids + parent_ids = self.tree_cluster_parent_ids[box_ids] + clevel = ClusterLevel( + level=self.nlevels - 1, + box_ids=box_ids, + parent_map=make_cluster_parent_map(parent_ids), + ) + + for _ in range(self.nlevels - 1, -1, -1): + yield clevel + + box_ids = np.unique(self.tree_cluster_parent_ids[clevel.box_ids]) + parent_ids = self.tree_cluster_parent_ids[box_ids] + clevel = ClusterLevel( + level=clevel.level - 1, + box_ids=box_ids, + parent_map=make_cluster_parent_map(parent_ids) + ) + + assert clevel.nclusters == 1 + +# }}} + + +# {{{ cluster + +def split_array(x: onp.Array1D[Any], + index: IndexList) -> obj_array.ObjectArray1D[onp.Array1D[Any]]: + """ + :returns: an object :class:`~numpy.ndarray` where each entry contains the + elements of the :math:`i`-th cluster in *index*. + """ + assert x.size == index.nindices + + return obj_array.new_1d([ + index.cluster_take(x, i) for i in range(index.nclusters) + ]) + + +@singledispatch +def cluster(obj: object, clevel: ClusterLevel) -> Any: + """Merge together elements of *obj* into their parent object, as described + by :attr:`ClusterLevel.parent_map`. + """ + raise NotImplementedError(type(obj).__name__) + + +@cluster.register(IndexList) +def cluster_index_list(obj: IndexList, clevel: ClusterLevel) -> IndexList: + assert obj.nclusters == clevel.nclusters + + if clevel.nclusters == 1: + return obj + + from pytential.linalg.utils import make_index_list + indices = obj_array.new_1d([ + np.concatenate([obj.cluster_indices(i) for i in ppm]) + for ppm in clevel.parent_map + ]) + + return make_index_list(indices) + + +@cluster.register(TargetAndSourceClusterList) +def cluster_target_and_source_cluster_list( + obj: TargetAndSourceClusterList, clevel: ClusterLevel, + ) -> TargetAndSourceClusterList: + assert obj.nclusters == clevel.nclusters + + if clevel.nclusters == 1: + return obj + + return replace(obj, + targets=cluster(obj.targets, clevel), + sources=cluster(obj.sources, clevel)) + + +@cluster.register(np.ndarray) +def cluster_ndarray(obj: obj_array.ObjectArray1D[onp.ArrayND[Any]], + clevel: ClusterLevel) -> obj_array.ObjectArray1D[onp.ArrayND[Any]]: + assert obj.shape == (clevel.nclusters,) + if clevel.nclusters == 1: + return obj + + def make_block(i: int, j: int): + if i == j: + return obj[i] + + return np.zeros((obj[i].shape[0], obj[j].shape[1]), dtype=obj[i].dtype) + + from pytools import single_valued + ndim = single_valued(block.ndim for block in obj) + + if ndim == 1: + return obj_array.new_1d([ + np.concatenate([obj[i] for i in ppm]) for ppm in clevel.parent_map + ]) + elif ndim == 2: + return obj_array.new_1d([ + np.block([[make_block(i, j) for j in ppm] for i in ppm]) + for ppm in clevel.parent_map + ]) + else: + raise ValueError(f"unsupported ndarray dimension: '{ndim}'") + +# }}} + + +# {{{ uncluster + +def uncluster(ary: obj_array.ObjectArray1D[onp.Array1D[Any]], + index: IndexList, + clevel: ClusterLevel) -> obj_array.ObjectArray1D[onp.Array1D[Any]]: + """Performs the reverse of :func:`cluster` on object arrays. + + :arg ary: an object :class:`~numpy.ndarray` with a shape that matches + :attr:`ClusterLevel.parent_map`. + :arg index: an :class:`~pytential.linalg.utils.IndexList` for the + current level, as given by :attr:`ClusterLevel.box_ids`. + :returns: an object :class:`~numpy.ndarray` with a shape that matches + :attr:`ClusterLevel.box_ids` of all the elements of *ary* that belong + to each child cluster. + """ + assert ary.dtype.char == "O" + assert ary.shape == (clevel.parent_map.size,) + + if index.nclusters == 1: + return ary + + result: np.ndarray = np.empty(index.nclusters, dtype=object) + for ifrom, ppm in enumerate(clevel.parent_map): + offset = 0 + for ito in ppm: + cluster_size = index.cluster_size(ito) + result[ito] = ary[ifrom][offset:offset + cluster_size] + offset += cluster_size + + assert ary[ifrom].shape == (offset,) + + return result + +# }}} + + +# {{{ cluster generation + +def _build_binary_ish_tree_from_starts(starts: onp.Array1D[np.integer]) -> ClusterTree: + partition_box_ids = np.arange(starts.size - 1) + box_ids = partition_box_ids + + box_parent_ids: list[onp.Array1D[np.integer]] = [] + offset = box_ids.size + while box_ids.size > 1: + # NOTE: this is probably not the most efficient way to do it, but this + # code is mostly meant for debugging using a simple tree + clusters = np.array_split(box_ids, box_ids.size // 2) + parent_ids = offset + np.arange(len(clusters)) + box_parent_ids.append(np.repeat(parent_ids, [len(c) for c in clusters])) + + box_ids = parent_ids + offset += box_ids.size + + # NOTE: make the root point to itself + box_parent_ids.append(np.array([offset - 1])) + nlevels = len(box_parent_ids) + + return ClusterTree( + nlevels=nlevels, + leaf_cluster_box_ids=partition_box_ids, + tree_cluster_parent_ids=np.concatenate(box_parent_ids), + _tree=None) + + +@log_process(logger) +def partition_by_nodes( + actx: PyOpenCLArrayContext, places: GeometryCollection, *, + dofdesc: sym.DOFDescriptorLike | None = None, + tree_kind: TreeKind | None = "adaptive-level-restricted", + max_particles_in_box: int | None = None) -> tuple[IndexList, ClusterTree]: + """Generate equally sized ranges of nodes. The partition is created at the + lowest level of granularity, i.e. nodes. This results in balanced ranges + of points, but will split elements across different ranges. + + :arg dofdesc: a :class:`~pytential.symbolic.dof_desc.DOFDescriptor` for + the geometry in *places* which should be partitioned. + :arg tree_kind: if not *None*, it is passed to :class:`boxtree.TreeBuilder`. + :arg max_particles_in_box: value used to control the number of points + in each partition (and thus the number of partitions). See the documentation + in :class:`boxtree.TreeBuilder`. + """ + if dofdesc is None: + dofdesc = places.auto_source + dofdesc = sym.as_dofdesc(dofdesc) + + if max_particles_in_box is None: + max_particles_in_box = _DEFAULT_MAX_PARTICLES_IN_BOX + + lpot_source = places.get_geometry(dofdesc.geometry) + assert isinstance(lpot_source, Discretization | QBXLayerPotentialSource) + + discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) + assert isinstance(discr, Discretization) + + if tree_kind is not None: + setup_actx = lpot_source._setup_actx + assert isinstance(setup_actx, PyOpenCLArrayContext) + + from pytential.qbx.utils import tree_code_container + tcc = tree_code_container(setup_actx) + + from arraycontext import flatten + from meshmode.dof_array import DOFArray + tree, _ = tcc.build_tree()(actx.queue, + particles=flatten( + actx.thaw(discr.nodes()), actx, leaf_class=DOFArray + ), + max_particles_in_box=max_particles_in_box, + kind=tree_kind) + tree = tree.get(actx.queue) + + # FIXME maybe this should use IS_LEAF once available? + from boxtree import box_flags_enum + assert tree.box_flags is not None + leaf_boxes, = ( + tree.box_flags & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES == 0 + ).nonzero() + + # FIXME: this annotation is not needed with numpy 2.0 + indices = np.empty(len(leaf_boxes), dtype=object) + starts = None + + for i, ibox in enumerate(leaf_boxes): + box_start = tree.box_source_starts[ibox] + box_end = box_start + tree.box_source_counts_cumul[ibox] + indices[i] = tree.user_source_ids[box_start:box_end] + + ctree = ClusterTree( + nlevels=tree.nlevels, + leaf_cluster_box_ids=leaf_boxes, + tree_cluster_parent_ids=tree.box_parent_ids, + _tree=tree) + else: + if discr.ambient_dim != 2 and discr.dim == 1: + raise ValueError("only curves are supported for 'tree_kind=None'") + + nclusters = max(discr.ndofs // max_particles_in_box, 2) + indices = np.arange(0, discr.ndofs, dtype=np.int64) + starts = np.linspace(0, discr.ndofs, nclusters + 1, dtype=np.int64) + + # FIXME: mypy seems to be able to figure this out with numpy 2.0 + assert starts is not None + assert starts[-1] == discr.ndofs + + ctree = _build_binary_ish_tree_from_starts(starts) + + from pytential.linalg import make_index_list + return make_index_list(indices, starts=starts), ctree + +# }}} + + +# {{{ visualize clusters + +def visualize_clusters(actx: PyOpenCLArrayContext, + generator: ProxyGenerator, + srcindex: IndexList, + tree: ClusterTree, + filename: str | pathlib.Path, *, + dofdesc: sym.DOFDescriptorLike = None, + overwrite: bool = False) -> None: + filename = pathlib.Path(filename) + + places = generator.places + if dofdesc is None: + dofdesc = places.auto_source + dofdesc = sym.as_dofdesc(dofdesc) + + discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) + assert isinstance(discr, Discretization) + + if discr.ambient_dim == 2: + _visualize_clusters_2d(actx, generator, discr, srcindex, tree, filename, + dofdesc=dofdesc, overwrite=overwrite) + elif discr.ambient_dim == 2: + _visualize_clusters_3d(actx, generator, discr, srcindex, tree, filename, + dofdesc=dofdesc, overwrite=overwrite) + else: + raise NotImplementedError(f"Unsupported dimension: {discr.ambient_dim}") + + +def _visualize_clusters_2d(actx: PyOpenCLArrayContext, + generator: ProxyGenerator, + discr: Discretization, + srcindex: IndexList, + tree: ClusterTree, + filename: pathlib.Path, *, + dofdesc: sym.DOFDescriptor, + overwrite: bool = False) -> None: + import matplotlib.pyplot as pt + + from arraycontext import flatten + from boxtree.visualization import TreePlotter + from meshmode.dof_array import DOFArray + + assert discr.ambient_dim == 2 + x, y = actx.to_numpy(flatten(discr.nodes(), actx, leaf_class=DOFArray)) + for clevel in tree.levels: + outfile = filename.with_stem(f"{filename.stem}-lvl{clevel.level:03d}") + if not overwrite and outfile.exists(): + raise FileExistsError(f"Output file '{outfile}' already exists") + + pxy = generator(actx, dofdesc, srcindex).to_numpy(actx) + pxycenters = pxy.centers + pxyradii = pxy.radii + clsradii = pxy.cluster_radii + + fig = pt.figure() + ax = fig.gca() + + plotter = TreePlotter(tree._tree) + plotter.set_bounding_box() + plotter.draw_tree(fill=False, edgecolor="black", zorder=10) + + ax.plot(x, y, "ko", ms=2.0) + for i in range(srcindex.nclusters): + isrc = srcindex.cluster_indices(i) + ax.plot(x[isrc], y[isrc], "o", ms=2.0) + + from itertools import cycle + colors = cycle(pt.rcParams["axes.prop_cycle"].by_key()["color"]) + + for ppm in clevel.parent_map: + color = next(colors) + for j in ppm: + center = (pxycenters[0, j], pxycenters[1, j]) + c = pt.Circle(center, pxyradii[j], color=color, alpha=0.1) + ax.add_artist(c) + c = pt.Circle(center, clsradii[j], color=color, alpha=0.1) + ax.add_artist(c) + ax.text(*center, f"{j}", fontsize=18) + + ax.set_xlabel("$x$") + ax.set_ylabel("$y$") + ax.relim() + ax.autoscale() + ax.set_aspect("equal") + + fig.savefig(outfile) + pt.close(fig) + + srcindex = cluster(srcindex, clevel) + + +def _visualize_clusters_3d(actx: PyOpenCLArrayContext, + generator: ProxyGenerator, + discr: Discretization, + srcindex: IndexList, + tree: ClusterTree, + filename: pathlib.Path, *, + dofdesc: sym.DOFDescriptor, + overwrite: bool = False) -> None: + from arraycontext import unflatten + from meshmode.discretization.visualization import make_visualizer + + # NOTE: This writes out one vtu file for each level that contains + # * a mesh that's the union of `discr` and a sphere for each proxy ball + # * marker: a marker on `discr` (NaN on the proxy balls) for each of the + # clusters at the current level + # * proxies: a marker on the proxy balls (NaN on `discr`) + # + # Not quite sure how to best visualize the whole geometry here, so the + # proposed workflow is to load the vtu file twice, set opacity to 0 for + # NaNs and set opacity to something small for the proxy balls. + + # TODO: + # * color proxy balls based on their parent so we can easily see how they + # will cluster + + assert discr.ambient_dim == 3 + for clevel in tree.levels: + outfile = filename.with_stem(f"{filename.stem}-lvl{clevel.level:03d}") + outfile = outfile.with_suffix(".vtu") + if not overwrite and outfile.exists(): + raise FileExistsError(f"Output file '{outfile}' already exists") + + # construct proxy balls + pxy = generator(actx, dofdesc, srcindex).to_numpy(actx) + pxycenters = pxy.centers + pxyradii = pxy.radii + nclusters = srcindex.nclusters + + # construct meshes for each proxy ball + from meshmode.mesh.generation import generate_sphere + from meshmode.mesh.processing import affine_map, merge_disjoint_meshes + + ref_mesh = generate_sphere(1, 4, uniform_refinement_rounds=1) + pxymeshes = [ + affine_map(ref_mesh, A=pxyradii[i], b=pxycenters[:, i].squeeze()) + for i in range(nclusters) + ] + + # merge meshes into a single discretization + from meshmode.discretization.poly_element import ( + InterpolatoryEdgeClusteredGroupFactory, + ) + pxymesh = merge_disjoint_meshes([discr.mesh, *pxymeshes]) + pxydiscr = Discretization(actx, pxymesh, + InterpolatoryEdgeClusteredGroupFactory(4)) + + # add a marker field for all clusters + marker = np.full((pxydiscr.ndofs,), np.nan, dtype=np.float64) + template_ary = actx.thaw(pxydiscr.nodes()[0]) + + for i in range(srcindex.nclusters): + isrc = srcindex.cluster_indices(i) + marker[isrc] = 10.0 * (i + 1.0) + marker_dev = unflatten(template_ary, actx.from_numpy(marker), actx) + + # add a marker field for all proxies + pxymarker = np.full((pxydiscr.ndofs,), np.nan, dtype=np.float64) + pxymarker[discr.ndofs:] = 1.0 + pxymarker_dev = unflatten(template_ary, actx.from_numpy(pxymarker), actx) + + # write it all out + vis = make_visualizer(actx, pxydiscr) + vis.write_vtk_file(str(outfile), [ + ("marker", marker_dev), + ("proxies", pxymarker_dev), + ], overwrite=overwrite) + + srcindex = cluster(srcindex, clevel) + + +# }}} + + +# vim: foldmethod=marker diff --git a/pytential/linalg/direct_solver_symbolic.py b/pytential/linalg/direct_solver_symbolic.py index 290f4048c..eb13a4946 100644 --- a/pytential/linalg/direct_solver_symbolic.py +++ b/pytential/linalg/direct_solver_symbolic.py @@ -76,12 +76,14 @@ def prepare_proxy_expr( places: GeometryCollection, exprs: Iterable[ArithmeticExpression], auto_where: tuple[DOFDescriptorLike, DOFDescriptorLike], + remove_transforms: bool = True, ) -> obj_array.ObjectArray1D[ArithmeticExpression]: def _prepare_expr(expr: ArithmeticExpression) -> ArithmeticExpression: # remove all diagonal / non-operator terms in the expression expr = IntGTermCollector()(expr) # ensure all IntGs remove all the kernel derivatives - expr = KernelTransformationRemover()(expr) + if remove_transforms: + expr = KernelTransformationRemover()(expr) # ensure all IntGs have their source and targets set expr = DOFDescriptorReplacer( default_source=auto_where[0], diff --git a/pytential/linalg/hmatrix.py b/pytential/linalg/hmatrix.py new file mode 100644 index 000000000..7bc97012b --- /dev/null +++ b/pytential/linalg/hmatrix.py @@ -0,0 +1,590 @@ +from __future__ import annotations + + +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +import numpy as np +import numpy.linalg as la +from scipy.sparse.linalg import LinearOperator + +from arraycontext import ArrayOrContainerT, PyOpenCLArrayContext, flatten, unflatten +from meshmode.dof_array import DOFArray +from pytools import ProcessLogger, log_process, obj_array + + +if TYPE_CHECKING: + from collections.abc import Sequence + + from numpy.typing import NDArray + + from pytential import GeometryCollection, sym + from pytential.linalg.cluster import ClusterLevel, ClusterTree + from pytential.linalg.proxy import ProxyGeneratorBase + from pytential.linalg.skeletonization import ( + SkeletonizationResult, + SkeletonizationWrangler, + ) + from pytential.linalg.utils import IndexList, TargetAndSourceClusterList + +logger = logging.getLogger(__name__) + + +__doc__ = """ +Hierarical Matrix Construction +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: ProxyHierarchicalMatrixWrangler +.. autoclass:: ProxyHierarchicalMatrix +.. autoclass:: ProxyHierarchicalForwardMatrix +.. autoclass:: ProxyHierarchicalBackwardMatrix + +.. autofunction:: build_hmatrix_by_proxy +""" + + +# {{{ error model + +def hmatrix_error_from_param( + ambient_dim: int, + *, + id_eps: float, + id_rank: int, + min_proxy_radius: float, + max_cluster_radius: float, + nproxies: int, + nsources: int, + ntargets: int, + c: float = 1.0e-3) -> float: + import math + + # FIXME: This is horribly out of date right now. Need to get the updated version + # from https://github.com/alexfikl/qbx-ds-paper-experiments + if ambient_dim == 2: + p = int(0.5 * id_rank) + elif ambient_dim == 3: + p = int((math.sqrt(1 + 4 * id_rank) - 1) / 2) + else: + raise ValueError(f"unsupported ambient dimension: '{ambient_dim}'") + + rho = alpha = max_cluster_radius / min_proxy_radius + return float( + c * rho ** (p + 1) / (1 - rho) + + math.sqrt(nsources / nproxies) + * (1 - alpha ** (p + 1)) / (1 - alpha) * id_eps + ) + +# }}} + + +# {{{ update diagonals + +def _update_skeleton_diagonal( + skeleton: SkeletonizationResult, + parent: SkeletonizationResult | None, + clevel: ClusterLevel | None, + diagonal: NDArray[np.inexact] | None = None) -> SkeletonizationResult: + """Due to the evaluation in :func:`_skeletonize_block_by_proxy_with_mats`, + the diagonal matrix in *skeleton* also contains the indices from its + parent. In particular, at a level :math:`l` we need the diagonal block:: + + 0 D_{i, j + 1} D_{i, j + 2} + D_{i + 1, j} 0 D_{i + 1, j + 2} + D_{i + 2, j} D_{i + 2, j + 1} 0 + + but the version in *skeleton* also fills in the 0 blocks in there. This + routine goes through them and zeros them out. + """ + + if clevel is None: + return skeleton + + assert parent is not None + assert skeleton.tgt_src_index.shape == parent.skel_tgt_src_index.shape + + if diagonal is None: + diagonal = np.zeros(parent.nclusters) + + from numbers import Number + if isinstance(diagonal, Number): + diagonal = np.full(parent.nclusters, diagonal, dtype=skeleton.dtype) + + assert diagonal.size == parent.nclusters + targets, sources = parent.skel_tgt_src_index + + # FIXME: nicer way to do this? + mat = np.empty(skeleton.nclusters, dtype=object) + for k in range(skeleton.nclusters): + D = skeleton.D[k].copy() + + i = j = 0 + for icluster in clevel.parent_map[k]: + di = targets.cluster_size(icluster) + dj = sources.cluster_size(icluster) + D[np.s_[i:i + di], np.s_[j:j + dj]] = diagonal[icluster] + + i += di + j += dj + + assert D.shape == (i, j) + mat[k] = D + + from dataclasses import replace + return replace(skeleton, D=mat) + + +@log_process(logger) +def _update_skeletons_diagonal( + wrangler: ProxyHierarchicalMatrixWrangler, + forward: bool = True, + ) -> NDArray[np.inexact]: + skeletons = np.empty(wrangler.skeletons.shape, dtype=object) + skeletons[0] = wrangler.skeletons[0] + + for i in range(1, wrangler.ctree.nlevels): + diagonal = None if forward else skeletons[i - 1].Dhat + + skeletons[i] = _update_skeleton_diagonal( + wrangler.skeletons[i], + wrangler.skeletons[i - 1], + wrangler.ctree.levels[i - 1], + diagonal=diagonal) + + return skeletons + +# }}} + + +# {{{ ProxyHierarchicalMatrix + +@dataclass(frozen=True) +class ProxyHierarchicalMatrixWrangler: + """ + .. automethod:: get_forward + .. automethod:: get_backward + """ + + wrangler: SkeletonizationWrangler + proxy: ProxyGeneratorBase + ctree: ClusterTree + skeletons: obj_array.ObjectArray1D[SkeletonizationResult] + + @property + def tgt_src_index(self) -> TargetAndSourceClusterList: + return self.skeletons[0].tgt_src_index + + def get_forward(self) -> ProxyHierarchicalForwardMatrix: + return ProxyHierarchicalForwardMatrix( + ctree=self.ctree, + skeletons=_update_skeletons_diagonal(self, forward=True), + ) + + def get_backward(self) -> ProxyHierarchicalBackwardMatrix: + return ProxyHierarchicalBackwardMatrix( + ctree=self.ctree, + skeletons=_update_skeletons_diagonal(self, forward=False)) + + +@dataclass(frozen=True) +class ProxyHierarchicalMatrix(LinearOperator): + """ + .. autoattribute:: ctree + .. autoattribute:: skeletons + + This class implements the :class:`scipy.sparse.linalg.LinearOperator` + interface. In particular, the following attributes and methods: + + .. autoproperty:: shape + .. autoproperty:: dtype + + .. automethod:: matvec + .. automethod:: __matmul__ + """ + + ctree: ClusterTree + """A tree structure that describes the hierarchy of the solver.""" + skeletons: obj_array.ObjectArray1D[SkeletonizationResult] + """An :class:`~numpy.ndarray` containing skeletonization information + for each level of the hierarchy. For additional details, see + :class:`~pytential.linalg.skeletonization.SkeletonizationResult`. + """ + + @property + def shape(self) -> tuple[int, int]: + """A :class:`tuple` that gives the size of the skeletonized operator.""" + return self.skeletons[0].tgt_src_index.shape + + @property + def dtype(self) -> np.dtype[np.inexact]: + """The :class:`numpy.dtype` of the skeletonized operator.""" + # FIXME: assert that everyone has this dtype? + return self.skeletons[0].R[0].dtype + + @property + def nlevels(self) -> int: + return self.skeletons.size + + @property + def nclusters(self) -> int: + return self.skeletons[0].nclusters + + def __matmul__(self, x: ArrayOrContainerT) -> ArrayOrContainerT: + """Same as :meth:`_matvec`.""" + return self._matvec(x) + + def _matmat(self, mat): + raise NotImplementedError + + def _adjoint(self, x): + raise NotImplementedError + +# }}} + + +# {{{ forward + +@dataclass(frozen=True) +class ProxyHierarchicalForwardMatrix(ProxyHierarchicalMatrix): + def _matvec(self, x: ArrayOrContainerT) -> ArrayOrContainerT: + if isinstance(x, DOFArray): + from arraycontext import get_container_context_recursively_opt + actx = get_container_context_recursively_opt(x) + if actx is None: + raise ValueError("input array is frozen") + + ary = actx.to_numpy(flatten(x, actx)) + elif isinstance(x, np.ndarray) and x.dtype.char != "O": + ary = x + else: + raise TypeError(f"unsupported input type: {type(x)}") + + assert actx is None or isinstance(actx, PyOpenCLArrayContext) + result = apply_skeleton_forward_matvec(self, ary) + + if isinstance(x, DOFArray): + assert actx is not None + result = unflatten(x, actx.from_numpy(result), actx) + + return result + + +@log_process(logger) +def apply_skeleton_forward_matvec( + hmat: ProxyHierarchicalMatrix, + ary: ArrayOrContainerT, + ) -> ArrayOrContainerT: + from pytential.linalg.cluster import split_array + targets, sources = hmat.skeletons[0].tgt_src_index + x = split_array(ary, sources) # type: ignore[arg-type] + + # NOTE: this computes a telescoping product of the form + # + # A x_0 = (D0 + L0 (D1 + L1 (...) R1) R0) x_0 + # + # with arbitrary numbers of levels. When recursing down, we compute + # + # x_{k + 1} = R_k x_k + # z_{k + 1} = D_k x_k + # + # and, at the root level, we have + # + # x_{N + 1} = z_{N + 1} = D_N x_N. + # + # When recursing back up, we take `b_{N + 1} = x_{N + 1}` and + # + # b_{k - 1} = z_k + L_k b_k + # + # which gives back the desired product when we reach the leaf level again. + + d_dot_x = np.empty(hmat.nlevels, dtype=object) + + # {{{ recurse down + + from pytential.linalg.cluster import cluster + + with ProcessLogger(logger, "apply_skeleton_forward_matvec (compress)"): + for k, clevel in enumerate(hmat.ctree.levels): + skeleton = hmat.skeletons[k] + assert x.shape == (skeleton.nclusters,) + assert skeleton.tgt_src_index.shape[1] == sum(xi.size for xi in x) + + d_dot_x_k = np.empty(skeleton.nclusters, dtype=object) + r_dot_x_k = np.empty(skeleton.nclusters, dtype=object) + + for i in range(skeleton.nclusters): + r_dot_x_k[i] = skeleton.R[i] @ x[i] + d_dot_x_k[i] = skeleton.D[i] @ x[i] + + d_dot_x[k] = d_dot_x_k + x = cluster(r_dot_x_k, clevel) + + # }}} + + # {{{ root + + # NOTE: at root level, we just multiply with the full diagonal + b = d_dot_x[hmat.nlevels - 1] + assert b.shape == (1,) + + # }}} + + # {{{ recurse up + + from pytential.linalg.cluster import uncluster + + with ProcessLogger(logger, "apply_skeleton_forward_matvec (inflate)"): + for k, clevel in reversed(list(enumerate(hmat.ctree.levels[:-1]))): + skeleton = hmat.skeletons[k] + d_dot_x_k = d_dot_x[k] + assert d_dot_x_k.shape == (skeleton.nclusters,) + + b = uncluster(b, skeleton.skel_tgt_src_index.targets, clevel) + for i in range(skeleton.nclusters): + b[i] = d_dot_x_k[i] + skeleton.L[i] @ b[i] + + assert b.shape == (hmat.nclusters,) + + # }}} + + return np.concatenate(b)[np.argsort(targets.indices)] + +# }}} + + +# {{{ backward + +@dataclass(frozen=True) +class ProxyHierarchicalBackwardMatrix(ProxyHierarchicalMatrix): + def _matvec(self, x: ArrayOrContainerT) -> ArrayOrContainerT: + if isinstance(x, DOFArray): + from arraycontext import get_container_context_recursively_opt + actx = get_container_context_recursively_opt(x) + if actx is None: + raise ValueError("input array is frozen") + + ary = actx.to_numpy(flatten(x, actx)) + elif isinstance(x, np.ndarray) and x.dtype.char != "O": + ary = x + else: + raise TypeError(f"unsupported input type: {type(x)}") + + assert actx is None or isinstance(actx, PyOpenCLArrayContext) + result = apply_skeleton_backward_matvec(actx, self, ary) + + if isinstance(x, DOFArray): + assert actx is not None + result = unflatten(x, actx.from_numpy(result), actx) + + return result + + +@log_process(logger) +def apply_skeleton_backward_matvec( + actx: PyOpenCLArrayContext | None, + hmat: ProxyHierarchicalMatrix, + ary: ArrayOrContainerT, + ) -> ArrayOrContainerT: + from pytential.linalg.cluster import split_array + targets, sources = hmat.skeletons[0].tgt_src_index + + b = split_array(ary, targets) # type: ignore[arg-type] + r_dot_b = np.empty(hmat.nlevels, dtype=object) + + # {{{ recurse down + + # NOTE: this solves a telescoping product of the form + # + # A x_0 = (D0 + L0 (D1 + L1 (...) R1) R0) x_0 = b_0 + # + # with arbitrary numbers of levels. When recursing down, we compute + # + # b_{k + 1} = \hat{D}_k R_k D_k^{-1} b_k + # \hat{D}_k = (R_k D_k^{-1} L_k)^{-1} + # + # and, at the root level, we solve + # + # D_N x_N = b_N. + # + # When recursing back up, we take `b_{N + 1} = x_{N + 1}` and + # + # x_{k} = D_k^{-1} (b_k - L_k b_{k + 1} + L_k \hat{D}_k x_{k + 1}) + # + # which gives back the desired product when we reach the leaf level again. + + from pytential.linalg.cluster import cluster + + with ProcessLogger(logger, "apply_skeleton_backward_matvec (compress)"): + for k, clevel in enumerate(hmat.ctree.levels): + skeleton = hmat.skeletons[k] + assert b.shape == (skeleton.nclusters,) + assert skeleton.tgt_src_index.shape[0] == sum(bi.size for bi in b) + + dhat_dot_b_k = np.empty(skeleton.nclusters, dtype=object) + for i in range(skeleton.nclusters): + dhat_dot_b_k[i] = ( + skeleton.Dhat[i] @ (skeleton.R[i] @ (skeleton.invD[i] @ b[i])) + ) + + r_dot_b[k] = b + b = cluster(dhat_dot_b_k, clevel) + + # }}} + + # {{{ root + + assert b.shape == (1,) + + with ProcessLogger(logger, + f"apply_skeleton_backward_matvec (root solve: {b[0].size}): "): + x = obj_array.new_1d([ + la.solve(D, bi) for D, bi in zip(hmat.skeletons[-1].D, b, strict=True) + ]) + + # }}} + + # {{{ recurse up + + from pytential.linalg.cluster import uncluster + + with ProcessLogger(logger, "apply_skeleton_backward_matvec (inflate)"): + for k, clevel in reversed(list(enumerate(hmat.ctree.levels[:-1]))): + skeleton = hmat.skeletons[k] + b0 = r_dot_b[k] + b1 = r_dot_b[k + 1] + assert b0.shape == (skeleton.nclusters,) + + x = uncluster(x, skeleton.skel_tgt_src_index.sources, clevel) + b1 = uncluster(b1, skeleton.skel_tgt_src_index.targets, clevel) + + for i in range(skeleton.nclusters): + sx = b1[i] - skeleton.Dhat[i] @ x[i] + x[i] = skeleton.invD[i] @ (b0[i] - skeleton.L[i] @ sx) + + assert x.shape == (hmat.nclusters,) + + # }}} + + return np.concatenate(x)[np.argsort(sources.indices)] + +# }}} + + +# {{{ build_hmatrix_by_proxy + +def build_hmatrix_by_proxy( + actx: PyOpenCLArrayContext, + places: GeometryCollection, + exprs: sym.Expression | Sequence[sym.Expression], + input_exprs: sym.Variable | Sequence[sym.Variable], *, + auto_where: sym.DOFDescriptorLike | None = None, + domains: Sequence[sym.DOFDescriptorLike] | None = None, + context: dict[str, Any] | None = None, + id_eps: float = 1.0e-8, + rng: np.random.Generator | None = None, + + # NOTE: these are dev variables and can disappear at any time! + _tree_kind: str | None = "adaptive-level-restricted", + _weighted_proxy: bool | tuple[bool, bool] | None = None, + + # TODO: plugin in error model to get an estimate for: + # * how many points we want per cluster? + # * how many proxy points we want? + # * how far away should the proxy points be? + # based on id_eps. How many of these should be user tunable? + _max_particles_in_box: int | None = None, + _approx_nproxy: int | None = None, + _proxy_radius_factor: float | None = None, + ) -> ProxyHierarchicalMatrixWrangler: + from pytential.linalg.skeletonization import make_skeletonization_wrangler + from pytential.symbolic.matrix import P2PClusterMatrixBuilder + + def P2PClusterMatrixBuilderWithDiagonal(*args, **kwargs): + kwargs["exclude_self"] = True + return P2PClusterMatrixBuilder(*args, **kwargs) + + wrangler = make_skeletonization_wrangler( + places, exprs, input_exprs, + domains=domains, context=context, auto_where=auto_where, + _weighted_proxy=_weighted_proxy, + # _remove_source_transforms=True, + # _neighbor_cluster_builder=P2PClusterMatrixBuilderWithDiagonal, + # _proxy_source_cluster_builder=P2PClusterMatrixBuilder, + # _proxy_target_cluster_builder=P2PClusterMatrixBuilder, + ) + + if wrangler.nrows != 1 or wrangler.ncols != 1: + raise ValueError("multi-block operators are not supported") + + from pytential.linalg.proxy import QBXProxyGenerator + proxy = QBXProxyGenerator(places, + approx_nproxy=_approx_nproxy, + radius_factor=_proxy_radius_factor) + + from pytential.linalg.cluster import partition_by_nodes + cluster_index, ctree = partition_by_nodes( + actx, places, + dofdesc=wrangler.domains[0], + tree_kind=_tree_kind, + max_particles_in_box=_max_particles_in_box) + + logger.info("tree levels: %d", ctree.nlevels) + logger.info("cluster count: %d", cluster_index.nclusters) + logger.info("leaf cluster sizes: %s", [ + # NOTE: making into a list so that they all get printed + int(s) for s in np.diff(cluster_index.starts) + ]) + + from pytential.linalg.utils import TargetAndSourceClusterList + tgt_src_index = TargetAndSourceClusterList( + targets=cluster_index, sources=cluster_index) + + from pytential.linalg.skeletonization import rec_skeletonize_by_proxy + skeletons = rec_skeletonize_by_proxy( + actx, places, ctree, tgt_src_index, exprs, input_exprs, + id_eps=id_eps, + rng=rng, + max_particles_in_box=_max_particles_in_box, + _proxy=proxy, + _wrangler=wrangler, + ) + + if __debug__: + def _get_cluster_avg_size(idx: IndexList) -> str: + d = np.diff(idx.starts) + return f"{np.mean(d):.2f} ± {np.std(d):.2f}" + + logger.info("avg cluster size: %s", " ".join( + _get_cluster_avg_size(sk.tgt_src_index.sources) + for sk in skeletons + )) + + return ProxyHierarchicalMatrixWrangler( + wrangler=wrangler, proxy=proxy, ctree=ctree, skeletons=skeletons + ) + +# }}} + +# vim: foldmethod=marker diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py index 964698a8e..338d971c8 100644 --- a/pytential/linalg/proxy.py +++ b/pytential/linalg/proxy.py @@ -36,7 +36,7 @@ from arraycontext import Array, ArrayContainer, PyOpenCLArrayContext, flatten from meshmode.discretization import Discretization from meshmode.dof_array import DOFArray -from pytools import memoize_in +from pytools import log_process, memoize_in from pytential import GeometryCollection, bind, sym from pytential.qbx import QBXLayerPotentialSource @@ -49,7 +49,6 @@ import optype.numpy as onp - from boxtree.tree_build import TreeKind from sumpy.expansion import ExpansionBase from sumpy.kernel import Kernel @@ -59,7 +58,6 @@ logger = logging.getLogger(__name__) - __doc__ = """ Proxy Point Generation ~~~~~~~~~~~~~~~~~~~~~~ @@ -74,7 +72,6 @@ .. autoclass:: QBXProxyGenerator :show-inheritance: -.. autofunction:: partition_by_nodes .. autofunction:: gather_cluster_neighbor_points """ @@ -82,82 +79,6 @@ _DEFAULT_MAX_PARTICLES_IN_BOX = 32 -# {{{ point index partitioning - -def partition_by_nodes( - actx: PyOpenCLArrayContext, - places: GeometryCollection, *, - dofdesc: DOFDescriptorLike | None = None, - tree_kind: TreeKind | None = "adaptive-level-restricted", - max_particles_in_box: int | None = None) -> IndexList: - """Generate equally sized ranges of nodes. The partition is created at the - lowest level of granularity, i.e. nodes. This results in balanced ranges - of points, but will split elements across different ranges. - - :arg dofdesc: a :class:`~pytential.symbolic.dof_desc.DOFDescriptor` for - the geometry in *places* which should be partitioned. - :arg tree_kind: if not *None*, it is passed to :class:`boxtree.TreeBuilder`. - :arg max_particles_in_box: value used to control the number of points - in each partition (and thus the number of partitions). See the documentation - in :class:`boxtree.TreeBuilder`. - """ - if dofdesc is None: - dofdesc = places.auto_source - dofdesc = sym.as_dofdesc(dofdesc) - - if max_particles_in_box is None: - max_particles_in_box = _DEFAULT_MAX_PARTICLES_IN_BOX - - from pytential.source import LayerPotentialSourceBase - - lpot_source = places.get_geometry(dofdesc.geometry) - assert isinstance(lpot_source, LayerPotentialSourceBase) - - discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) - assert isinstance(discr, Discretization) - - if tree_kind is not None: - from pytential.qbx.utils import tree_code_container - tcc = tree_code_container(lpot_source._setup_actx) - - tree, _ = tcc.build_tree()(actx.queue, - particles=flatten( - actx.thaw(discr.nodes()), actx, leaf_class=DOFArray - ), - max_particles_in_box=max_particles_in_box, - kind=tree_kind) - - from boxtree import box_flags_enum - tree = tree.get(actx.queue) - # FIXME: maybe this should use IS_LEAF once available? - assert tree.box_flags is not None - leaf_boxes, = ( - tree.box_flags & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES == 0 - ).nonzero() - - indices = np.empty(len(leaf_boxes), dtype=object) - starts: onp.Array1D[np.integer] | None = None - - for i, ibox in enumerate(leaf_boxes): - box_start = tree.box_source_starts[ibox] - box_end = box_start + tree.box_source_counts_cumul[ibox] - indices[i] = tree.user_source_ids[box_start:box_end] - else: - if discr.ambient_dim != 2 and discr.dim == 1: - raise ValueError("only curves are supported for 'tree_kind=None'") - - nclusters = max(discr.ndofs // max_particles_in_box, 2) - indices = np.arange(0, discr.ndofs, dtype=np.int64) - starts = np.linspace(0, discr.ndofs, nclusters + 1, dtype=np.int64) - - assert starts[-1] == discr.ndofs - - from pytential.linalg.utils import make_index_list - return make_index_list(indices, starts=starts) - -# }}} - - # {{{ proxy points class ProxyPointSource(PointPotentialSource): @@ -231,6 +152,7 @@ class ProxyClusterGeometryData: """ places: GeometryCollection + """Geometry collection containing the used :attr:`dofdesc`.""" dofdesc: sym.DOFDescriptor """A descriptor for the geometry used to compute the proxy points.""" @@ -469,6 +391,7 @@ def get_centers_kernel_ex(self, actx: PyOpenCLArrayContext) -> lp.ExecutorBase: def get_radii_kernel_ex(self, actx: PyOpenCLArrayContext) -> lp.ExecutorBase: pass + @log_process(logger) def __call__(self, actx: PyOpenCLArrayContext, source_dd: DOFDescriptorLike | None, @@ -655,6 +578,7 @@ def get_radii_kernel_ex(self, actx: PyOpenCLArrayContext) -> lp.ExecutorBase: return make_compute_cluster_qbx_radii_kernel_ex(actx, self.ambient_dim) @override + @log_process(logger) def __call__(self, actx: PyOpenCLArrayContext, source_dd: DOFDescriptorLike | None, @@ -688,6 +612,7 @@ def __call__(self, # {{{ gather_cluster_neighbor_points +@log_process(logger) def gather_cluster_neighbor_points( actx: PyOpenCLArrayContext, pxy: ProxyClusterGeometryData, diff --git a/pytential/linalg/skeletonization.py b/pytential/linalg/skeletonization.py index 0f14fe5dc..8feade486 100644 --- a/pytential/linalg/skeletonization.py +++ b/pytential/linalg/skeletonization.py @@ -23,15 +23,18 @@ THE SOFTWARE. """ +import logging from dataclasses import dataclass from typing import TYPE_CHECKING, Any import numpy as np +import numpy.linalg as la from meshmode.discretization import Discretization -from pytools import memoize_in, obj_array +from pytools import log_process, memoize_in, memoize_method, obj_array from pytential import GeometryCollection, bind, sym +from pytential.linalg.cluster import ClusterTree, cluster from pytential.linalg.direct_solver_symbolic import ( PROXY_SKELETONIZATION_SOURCE, PROXY_SKELETONIZATION_TARGET, @@ -52,16 +55,23 @@ from pytential.linalg.proxy import ProxyClusterGeometryData, ProxyGeneratorBase from pytential.symbolic.matrix import ClusterMatrixBuilderBase +logger = logging.getLogger(__name__) + + +logger = logging.getLogger(__name__) + +logger = logging.getLogger(__name__) __doc__ = """ Skeletonization ---------------- +~~~~~~~~~~~~~~~ .. autoclass:: SkeletonizationWrangler .. autoclass:: make_skeletonization_wrangler .. autoclass:: SkeletonizationResult .. autofunction:: skeletonize_by_proxy +.. autofunction:: rec_skeletonize_by_proxy """ @@ -143,7 +153,9 @@ def prg(): """ <> ioffset = starts[icluster] <> npoints = starts[icluster + 1] - ioffset - result[icluster] = reduce(sum, i, waa[indices[i + ioffset]]) / npoints + result[icluster] = ( + reduce(sum, i, abs(waa[indices[i + ioffset]])) / npoints + if npoints > 0 else 1.0) """, lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, ) @@ -307,9 +319,23 @@ def _evaluate_expr( context=self.context, **kwargs)(expr) + def evaluate_self( + self, + actx: PyOpenCLArrayContext, + places: GeometryCollection, + tgt_src_index: TargetAndSourceClusterList, + ibrow: int, ibcol: int, + ) -> onp.Array1D[Any]: + cls = self.neighbor_cluster_builder + return self._evaluate_expr( + actx, places, cls, tgt_src_index, self.exprs[ibrow], + idomain=ibcol, _weighted=True) + # {{{ nearfield - def evaluate_source_neighbor_interaction(self, + @log_process(logger) + def evaluate_source_neighbor_interaction( + self, actx: PyOpenCLArrayContext, places: GeometryCollection, pxy: ProxyClusterGeometryData, @@ -322,11 +348,13 @@ def evaluate_source_neighbor_interaction(self, expr = self.exprs[ibrow] mat = self._evaluate_expr( actx, places, eval_mapper_cls, nbr_src_index, expr, - idomain=ibcol, _weighted=self.weighted_sources) + idomain=ibcol, _weighted=True) return mat, nbr_src_index - def evaluate_target_neighbor_interaction(self, + @log_process(logger) + def evaluate_target_neighbor_interaction( + self, actx: PyOpenCLArrayContext, places: GeometryCollection, pxy: ProxyClusterGeometryData, @@ -339,7 +367,7 @@ def evaluate_target_neighbor_interaction(self, expr = self.exprs[ibrow] mat = self._evaluate_expr( actx, places, eval_mapper_cls, tgt_nbr_index, expr, - idomain=ibcol, _weighted=self.weighted_targets) + idomain=ibcol, _weighted=True) return mat, tgt_nbr_index @@ -347,7 +375,9 @@ def evaluate_target_neighbor_interaction(self, # {{{ proxy - def evaluate_source_proxy_interaction(self, + @log_process(logger) + def evaluate_source_proxy_interaction( + self, actx: PyOpenCLArrayContext, places: GeometryCollection, pxy: ProxyClusterGeometryData, @@ -356,10 +386,15 @@ def evaluate_source_proxy_interaction(self, ) -> tuple[onp.Array1D[np.inexact], TargetAndSourceClusterList]: from pytential.collection import add_geometry_to_collection pxy_src_index = TargetAndSourceClusterList(pxy.pxyindex, pxy.srcindex) + places = add_geometry_to_collection( places, {PROXY_SKELETONIZATION_TARGET: pxy.as_targets()} ) + if not self.weighted_sources: + logger.warning("Source-Proxy weighting is turned off. This will not give " + "good results for skeletonization.", stacklevel=3) + eval_mapper_cls = self.proxy_source_cluster_builder expr = self.source_proxy_exprs[ibrow] mat = self._evaluate_expr( @@ -370,7 +405,9 @@ def evaluate_source_proxy_interaction(self, return mat, pxy_src_index - def evaluate_target_proxy_interaction(self, + @log_process(logger) + def evaluate_target_proxy_interaction( + self, actx: PyOpenCLArrayContext, places: GeometryCollection, pxy: ProxyClusterGeometryData, nbrindex: IndexList, *, @@ -378,6 +415,7 @@ def evaluate_target_proxy_interaction(self, ) -> tuple[onp.Array1D[np.inexact], TargetAndSourceClusterList]: from pytential.collection import add_geometry_to_collection tgt_pxy_index = TargetAndSourceClusterList(pxy.srcindex, pxy.pxyindex) + places = add_geometry_to_collection( places, {PROXY_SKELETONIZATION_SOURCE: pxy.as_sources()} ) @@ -394,6 +432,9 @@ def evaluate_target_proxy_interaction(self, mat = _apply_weights( actx, mat, places, tgt_pxy_index, nbrindex, self.domains[ibcol]) + else: + logger.warning("Target-Proxy weighting is turned off. This will not give " + "good results for skeletonization.", stacklevel=3) return mat, tgt_pxy_index @@ -410,6 +451,7 @@ def make_skeletonization_wrangler( # internal _weighted_proxy: bool | tuple[bool, bool] | None = None, + _remove_source_transforms: bool = False, _proxy_source_cluster_builder: type[ClusterMatrixBuilderBase] | None = None, _proxy_target_cluster_builder: type[ClusterMatrixBuilderBase] | None = None, _neighbor_cluster_builder: type[ClusterMatrixBuilderBase] | None = None, @@ -438,9 +480,13 @@ def make_skeletonization_wrangler( prepared_lpot_exprs = prepare_expr(places, lpot_exprs, auto_where) source_proxy_exprs = prepare_proxy_expr( - places, prepared_lpot_exprs, (auto_where[0], PROXY_SKELETONIZATION_TARGET)) + places, prepared_lpot_exprs, (auto_where[0], PROXY_SKELETONIZATION_TARGET), + remove_transforms=_remove_source_transforms) target_proxy_exprs = prepare_proxy_expr( - places, prepared_lpot_exprs, (PROXY_SKELETONIZATION_SOURCE, auto_where[1])) + places, prepared_lpot_exprs, (PROXY_SKELETONIZATION_SOURCE, auto_where[1]), + # NOTE: transforms are unconditionally removed here because the + # source would be the proxies, where we do not have normals, etc. + remove_transforms=True) # }}} @@ -450,7 +496,7 @@ def make_skeletonization_wrangler( weighted_sources = weighted_targets = True elif isinstance(_weighted_proxy, bool): weighted_sources = weighted_targets = _weighted_proxy - elif isinstance(_weighted_proxy, tuple): + elif isinstance(_weighted_proxy, tuple) and len(_weighted_proxy) == 2: weighted_sources, weighted_targets = _weighted_proxy else: raise ValueError(f"unknown value for weighting: '{_weighted_proxy}'") @@ -474,7 +520,10 @@ def make_skeletonization_wrangler( proxy_target_cluster_builder = _proxy_target_cluster_builder if proxy_target_cluster_builder is None: - proxy_target_cluster_builder = QBXClusterMatrixBuilder + if _remove_source_transforms: + proxy_target_cluster_builder = P2PClusterMatrixBuilder + else: + proxy_target_cluster_builder = QBXClusterMatrixBuilder # }}} @@ -562,7 +611,8 @@ def _evaluate_proxy_skeletonization_interaction( actx: PyOpenCLArrayContext, places: GeometryCollection, proxy_generator: ProxyGeneratorBase, - cluster_index: IndexList, *, + source_index: IndexList, + target_index: IndexList, *, evaluate_proxy: Callable[..., tuple[onp.Array1D[np.inexact], TargetAndSourceClusterList]], evaluate_neighbor: Callable[..., @@ -574,23 +624,71 @@ def _evaluate_proxy_skeletonization_interaction( each cluster in *cluster_index*. """ - if cluster_index.nclusters == 1: + if source_index.nclusters == 1: raise ValueError("cannot make a proxy skeleton for a single cluster") from pytential.linalg.proxy import gather_cluster_neighbor_points - pxy = proxy_generator(actx, dofdesc, cluster_index) + pxy = proxy_generator(actx, dofdesc, source_index) nbrindex = gather_cluster_neighbor_points( - actx, pxy, + actx, pxy, target_index, max_particles_in_box=max_particles_in_box) pxymat, pxy_cluster_index = evaluate_proxy(actx, places, pxy, nbrindex) nbrmat, nbr_cluster_index = evaluate_neighbor(actx, places, pxy, nbrindex) - - return _ProxyNeighborEvaluationResult( + result = _ProxyNeighborEvaluationResult( pxy=pxy, pxymat=pxymat, pxyindex=pxy_cluster_index, nbrmat=nbrmat, nbrindex=nbr_cluster_index) + return result + + +def _worker_skeletonize_block_by_proxy( + data: tuple[int, onp.Array2D[np.inexact], onp.Array2D[np.inexact]], + *, + tgt_src_index: TargetAndSourceClusterList, + id_rank: int | None, + id_eps: float | None, + rng: np.random.Generator, + ) -> tuple[int, + onp.Array2D[np.inexact], onp.Array2D[np.inexact], + onp.Array1D[np.integer], onp.Array1D[np.integer]]: + from pytential.linalg.utils import interp_decomp + + k = id_rank + i, src_mat, tgt_mat = data + max_allowable_rank = min(*src_mat.shape, *tgt_mat.shape) + + if __debug__: + isfinite = np.isfinite(tgt_mat) + assert np.all(isfinite), np.where(~isfinite) + isfinite = np.isfinite(src_mat) + assert np.all(isfinite), np.where(~isfinite) + + # skeletonize target points + k, idx, interp = interp_decomp(tgt_mat.T, rank=k, eps=id_eps, rng=rng) + assert 0 < k <= len(idx) + + if k > max_allowable_rank: + k = max_allowable_rank + interp = interp[:k, :] + + L_i = interp.T + skel_tgt_indices_i = tgt_src_index.targets.cluster_indices(i)[idx[:k]] + assert L_i.shape == (tgt_mat.shape[0], k) + + # skeletonize source points + k, idx, interp = interp_decomp(src_mat, rank=k, eps=None, rng=rng) + assert 0 < k <= len(idx) + + R_i = interp + skel_src_indices_i = tgt_src_index.sources.cluster_indices(i)[idx[:k]] + assert R_i.shape == (k, src_mat.shape[1]) + + assert skel_tgt_indices_i.shape == skel_src_indices_i.shape + + return i, L_i, R_i, skel_src_indices_i, skel_tgt_indices_i + def _skeletonize_block_by_proxy_with_mats( actx: PyOpenCLArrayContext, ibrow: int, ibcol: int, @@ -615,79 +713,113 @@ def _skeletonize_block_by_proxy_with_mats( dofdesc=wrangler.domains[ibcol], max_particles_in_box=max_particles_in_box) - src_result = evaluate_skeletonization_interaction( - tgt_src_index.sources, - evaluate_proxy=partial( - wrangler.evaluate_source_proxy_interaction, - ibrow=ibrow, ibcol=ibcol), - evaluate_neighbor=partial( - wrangler.evaluate_source_neighbor_interaction, - ibrow=ibrow, ibcol=ibcol), - ) - tgt_result = evaluate_skeletonization_interaction( - tgt_src_index.targets, - evaluate_proxy=partial( - wrangler.evaluate_target_proxy_interaction, - ibrow=ibrow, ibcol=ibcol), - evaluate_neighbor=partial( - wrangler.evaluate_target_neighbor_interaction, - ibrow=ibrow, ibcol=ibcol) - ) - - src_skl_indices = np.empty(nclusters, dtype=object) - tgt_skl_indices = np.empty(nclusters, dtype=object) + skel_src_indices = np.empty(nclusters, dtype=object) + skel_tgt_indices = np.empty(nclusters, dtype=object) skel_starts = np.zeros(nclusters + 1, dtype=np.int32) L = np.empty(nclusters, dtype=object) R = np.empty(nclusters, dtype=object) - from pytential.linalg.utils import interp_decomp - - for i in range(nclusters): - k = id_rank - src_mat = np.vstack(src_result[i]) - tgt_mat = np.hstack(tgt_result[i]) - max_allowable_rank = min(*src_mat.shape, *tgt_mat.shape) - - if __debug__: - isfinite = np.isfinite(tgt_mat) - assert np.all(isfinite), np.where(isfinite) - isfinite = np.isfinite(src_mat) - assert np.all(isfinite), np.where(isfinite) - - # skeletonize target points - k, idx, interp = interp_decomp(tgt_mat.T, rank=k, eps=id_eps, rng=rng) - assert 0 < k <= len(idx) - - if k > max_allowable_rank: - k = max_allowable_rank - interp = interp[:k, :] - - L[i] = interp.T - tgt_skl_indices[i] = tgt_src_index.targets.cluster_indices(i)[idx[:k]] - assert interp.shape == (k, tgt_mat.shape[0]) - - # skeletonize source points - k, idx, interp = interp_decomp(src_mat, rank=k, eps=None, rng=rng) - assert 0 < k <= len(idx) + from pytools import ProcessTimer + + with ProcessTimer() as pt: + src_result = evaluate_skeletonization_interaction( + tgt_src_index.sources, tgt_src_index.targets, + evaluate_proxy=partial( + wrangler.evaluate_source_proxy_interaction, + ibrow=ibrow, ibcol=ibcol), + evaluate_neighbor=partial( + wrangler.evaluate_source_neighbor_interaction, + ibrow=ibrow, ibcol=ibcol), + ) + tgt_result = evaluate_skeletonization_interaction( + tgt_src_index.targets, tgt_src_index.sources, + evaluate_proxy=partial( + wrangler.evaluate_target_proxy_interaction, + ibrow=ibrow, ibcol=ibcol), + evaluate_neighbor=partial( + wrangler.evaluate_target_neighbor_interaction, + ibrow=ibrow, ibcol=ibcol) + ) + logger.info("_skeletonize_block_by_proxy_with_mats (evaluate): completed (%s)", + pt) + + with ProcessTimer() as pt: + mats = ((i, np.vstack(src_result[i]), np.hstack(tgt_result[i])) + for i in range(nclusters)) + worker = partial(_worker_skeletonize_block_by_proxy, + tgt_src_index=tgt_src_index, + id_rank=id_rank, + id_eps=id_eps, + rng=rng) + + import multiprocessing + import os + + max_workers = int(os.environ.get("PYTENTIAL_HMATRIX_CPU_COUNT", + multiprocessing.cpu_count())) + + if max_workers == 0: + for (i, L_i, R_i, skel_src_i, skel_tgt_i) in (worker(mat) for mat in mats): + L[i] = L_i + R[i] = R_i + skel_tgt_indices[i] = skel_tgt_i + skel_src_indices[i] = skel_src_i + skel_starts[i + 1] = skel_starts[i] + skel_tgt_i.size + else: + # NOTE: we have a lot of threads (from Python / OpenCL / OpenBLAS) that + # get in here, so the `fork` context does not work. We need the `forkserver` + # so that a new clean child is created. + context = multiprocessing.get_context("forkserver") + + from concurrent.futures import ProcessPoolExecutor + with ProcessPoolExecutor(max_workers=max_workers, + mp_context=context) as pool: + + for (i, L_i, R_i, skel_src_i, skel_tgt_i) in pool.map(worker, mats): + L[i] = L_i + R[i] = R_i + skel_tgt_indices[i] = skel_tgt_i + skel_src_indices[i] = skel_src_i + skel_starts[i + 1] = skel_starts[i] + skel_tgt_i.size + logger.info("_skeletonize_block_by_proxy_with_mats (skeletonize): completed (%s)", + pt) + + # evaluate diagonal + from pytential.linalg.utils import make_flat_cluster_diag + mat = wrangler.evaluate_self(actx, places, tgt_src_index, ibrow, ibcol) + D = make_flat_cluster_diag(mat, tgt_src_index) + + from pytential.linalg import make_index_list + skel_src_index = make_index_list(np.hstack(list(skel_src_indices)), skel_starts) + skel_tgt_index = make_index_list(np.hstack(list(skel_tgt_indices)), skel_starts) + skel_tgt_src_index = TargetAndSourceClusterList(skel_tgt_index, skel_src_index) - R[i] = interp - src_skl_indices[i] = tgt_src_index.sources.cluster_indices(i)[idx[:k]] - assert interp.shape == (k, src_mat.shape[1]) + return SkeletonizationResult( + L=L, R=R, D=D, + tgt_src_index=tgt_src_index, skel_tgt_src_index=skel_tgt_src_index, + _src_eval_result=src_result, _tgt_eval_result=tgt_result) - skel_starts[i + 1] = skel_starts[i] + k - assert tgt_skl_indices[i].shape == src_skl_indices[i].shape - from pytential.linalg.utils import make_index_list +def _evaluate_root( + actx: PyOpenCLArrayContext, ibrow: int, ibcol: int, + places: GeometryCollection, + wrangler: SkeletonizationWrangler, + tgt_src_index: TargetAndSourceClusterList + ) -> SkeletonizationResult: + assert tgt_src_index.nclusters == 1 - src_skl_index = make_index_list(np.hstack(list(src_skl_indices)), skel_starts) - tgt_skl_index = make_index_list(np.hstack(list(tgt_skl_indices)), skel_starts) - skel_tgt_src_index = TargetAndSourceClusterList(tgt_skl_index, src_skl_index) + from pytential.linalg.utils import make_flat_cluster_diag + mat = wrangler.evaluate_self(actx, places, tgt_src_index, ibrow, ibcol) + D = make_flat_cluster_diag(mat, tgt_src_index) return SkeletonizationResult( - L=L, R=R, - tgt_src_index=tgt_src_index, skel_tgt_src_index=skel_tgt_src_index, - _src_eval_result=src_result, _tgt_eval_result=tgt_result) + L=obj_array.new_1d([np.eye(*D[0].shape)]), + R=obj_array.new_1d([np.eye(*D[0].shape)]), + D=D, + tgt_src_index=tgt_src_index, skel_tgt_src_index=tgt_src_index, + _src_eval_result=None, _tgt_eval_result=None, + ) # }}} @@ -719,6 +851,7 @@ class SkeletonizationResult: .. autoattribute:: L .. autoattribute:: R + .. autoattribute:: D .. autoattribute:: tgt_src_index .. autoattribute:: skel_tgt_src_index """ @@ -729,6 +862,9 @@ class SkeletonizationResult: R: obj_array.ObjectArray1D[onp.Array2D[Any]] """An object :class:`~numpy.ndarray` of size ``(nclusters,)`` that contains the right block interpolation matrices.""" + D: obj_array.ObjectArray1D[onp.Array2D[Any]] + """An object :class:`~numpy.ndarray` of size ``(nclusters,)`` that contains + the dense diagonal blocks.""" tgt_src_index: TargetAndSourceClusterList """A :class:`~pytential.linalg.utils.TargetAndSourceClusterList` representing @@ -766,16 +902,36 @@ def nclusters(self) -> int: """Number of clusters that have been skeletonized.""" return self.tgt_src_index.nclusters + @property + def dtype(self) -> np.dtype[Any]: + # FIXME: check that everyone has the same dtype? probably in __post_init__ + return self.L[0].dtype + @property + @memoize_method + def invD(self) -> obj_array.ObjectArray1D[onp.Array2D[np.inexact]]: + return obj_array.new_1d([la.inv(D) for D in self.D]) + + @property + @memoize_method + def Dhat(self) -> obj_array.ObjectArray1D[onp.Array2D[np.inexact]]: + return obj_array.new_1d([ + la.inv(self.R[i] @ self.invD[i] @ self.L[i]) + for i in range(self.nclusters) + ]) + + +@log_process(logger) def skeletonize_by_proxy( actx: PyOpenCLArrayContext, places: GeometryCollection, tgt_src_index: TargetAndSourceClusterList, - exprs: sym.var | Sequence[sym.var], + exprs: ArithmeticExpression | Sequence[ArithmeticExpression], input_exprs: sym.var | Sequence[sym.var], *, domains: Sequence[Hashable] | None = None, context: dict[str, Any] | None = None, + auto_where: Any = None, approx_nproxy: int | None = None, proxy_radius_factor: float | None = None, @@ -784,7 +940,7 @@ def skeletonize_by_proxy( id_rank: int | None = None, rng: np.random.Generator | None = None, max_particles_in_box: int | None = None, - ) -> obj_array.ObjectArray2D[onp.Array2D[np.inexact]]: + ) -> obj_array.ObjectArray2D[SkeletonizationResult]: r"""Evaluate and skeletonize a symbolic expression using proxy-based methods. :arg tgt_src_index: a :class:`~pytential.linalg.utils.TargetAndSourceClusterList` @@ -810,21 +966,105 @@ def skeletonize_by_proxy( from pytential.linalg.proxy import QBXProxyGenerator wrangler = make_skeletonization_wrangler( places, exprs, input_exprs, - domains=domains, context=context) + domains=domains, context=context, auto_where=auto_where) proxy = QBXProxyGenerator(places, approx_nproxy=approx_nproxy, radius_factor=proxy_radius_factor) + from itertools import product + skels = np.empty((wrangler.nrows, wrangler.ncols), dtype=object) - for ibrow in range(wrangler.nrows): - for ibcol in range(wrangler.ncols): - skels[ibrow, ibcol] = _skeletonize_block_by_proxy_with_mats( - actx, ibrow, ibcol, places, proxy, wrangler, tgt_src_index, - id_eps=id_eps, - id_rank=id_rank, - max_particles_in_box=max_particles_in_box, - rng=rng) + for ibrow, ibcol in product(range(wrangler.nrows), range(wrangler.ncols)): + skels[ibrow, ibcol] = _skeletonize_block_by_proxy_with_mats( + actx, ibrow, ibcol, places, proxy, wrangler, tgt_src_index, + id_eps=id_eps, id_rank=id_rank, + max_particles_in_box=max_particles_in_box, + rng=rng) return skels # }}} + + +# {{{ recursive skeletonization by proxy + +@log_process(logger) +def rec_skeletonize_by_proxy( + actx: PyOpenCLArrayContext, + places: GeometryCollection, + + ctree: ClusterTree, + tgt_src_index: TargetAndSourceClusterList, + exprs: ArithmeticExpression | Sequence[ArithmeticExpression], + input_exprs: sym.var | Sequence[sym.var], *, + domains: Sequence[Hashable] | None = None, + context: dict[str, Any] | None = None, + auto_where: Any = None, + + approx_nproxy: int | None = None, + proxy_radius_factor: float | None = None, + + id_eps: float | None = None, + rng: np.random.Generator | None = None, + max_particles_in_box: int | None = None, + + _wrangler: SkeletonizationWrangler | None = None, + _proxy: ProxyGeneratorBase | None = None, + ) -> obj_array.ObjectArray1D[SkeletonizationResult]: + r"""Performs recursive skeletonization based on :func:`skeletonize_by_proxy`. + + :returns: an object :class:`~numpy.ndarray` of :class:`SkeletonizationResult`\ s, + one per level in *ctree*. + """ + + assert ctree.nclusters == tgt_src_index.nclusters + + if id_eps is None: + id_eps = 1.0e-8 + + if _proxy is None: + from pytential.linalg.proxy import QBXProxyGenerator + proxy: ProxyGeneratorBase = QBXProxyGenerator(places, + approx_nproxy=approx_nproxy, + radius_factor=proxy_radius_factor) + else: + proxy = _proxy + + if _wrangler is None: + wrangler = make_skeletonization_wrangler( + places, exprs, input_exprs, + domains=domains, context=context, auto_where=auto_where) + else: + wrangler = _wrangler + + if wrangler.nrows != 1 or wrangler.ncols != 1: + raise NotImplementedError("support for block matrices") + + from itertools import product + + skel_per_level = np.empty(ctree.nlevels, dtype=object) + for i, clevel in enumerate(ctree.levels[:-1]): + for ibrow, ibcol in product(range(wrangler.nrows), range(wrangler.ncols)): + skeleton = _skeletonize_block_by_proxy_with_mats( + actx, ibrow, ibcol, proxy.places, proxy, wrangler, tgt_src_index, + id_eps=id_eps, + # NOTE: we probably never want to set the rank here? + id_rank=None, + rng=rng, + max_particles_in_box=max_particles_in_box) + + skel_per_level[i] = skeleton + tgt_src_index = cluster(skeleton.skel_tgt_src_index, clevel) + + assert tgt_src_index.nclusters == 1 + assert not isinstance(skel_per_level[-1], SkeletonizationResult) + + # evaluate the full root cluster (no skeletonization or anything) + skeleton = _evaluate_root(actx, 0, 0, places, wrangler, tgt_src_index) + skel_per_level[-1] = skeleton + + return skel_per_level + +# }}} + +# vim: foldmethod=marker diff --git a/pytential/linalg/utils.py b/pytential/linalg/utils.py index ed86aff85..977ce42be 100644 --- a/pytential/linalg/utils.py +++ b/pytential/linalg/utils.py @@ -53,6 +53,9 @@ .. autofunction:: make_index_list .. autofunction:: make_index_cluster_cartesian_product +.. autofunction:: make_flat_cluster_diag + +.. autofunction:: interp_decomp """ InexactT = TypeVar("InexactT", bound=np.inexact) @@ -479,6 +482,22 @@ def mnorm(x: onp.Array2D[np.inexact], return tgt_error, src_error +def skeletonization_matrix( + mat: np.ndarray, skeleton: SkeletonizationResult, + ) -> tuple[np.ndarray, np.ndarray]: + D: np.ndarray = np.empty(skeleton.nclusters, dtype=object) + S: np.ndarray = np.empty((skeleton.nclusters, skeleton.nclusters), dtype=object) + + from itertools import product + for i, j in product(range(skeleton.nclusters), repeat=2): + if i == j: + D[i] = skeleton.tgt_src_index.cluster_take(mat, i, i) + else: + S[i, j] = skeleton.skel_tgt_src_index.cluster_take(mat, i, j) + + return D, S + + def skeletonization_error( mat: onp.Array2D[np.inexact], skeleton: SkeletonizationResult, *, ord: float | None = None, @@ -540,4 +559,51 @@ def skeletonization_error( # }}} + +# {{{ eigenvalues + +def eigs( + mat, *, + k: int = 6, + which: str = "LM", + maxiter: int | None = None, + tol: float = 0.0) -> np.ndarray: + import scipy.sparse.linalg as ssla + + result = ssla.eigs(mat, + k=k, + which=which, + maxiter=maxiter, + tol=tol, + return_eigenvectors=False) + + imag_norm = np.linalg.norm(np.imag(result), ord=np.inf) + if imag_norm > 1.0e-14: + from warnings import warn + warn(f"eigenvalues are not real enough: norm(imag) = {imag_norm:.12e}", + stacklevel=2) + + return result + + +def cond(mat, *, + mat_inv=None, + p: float | None = None, + tol: float = 1.0e-6) -> float: + if p is None: + p = 2 + + if p != 2: + raise ValueError(f"unsupported norm order: '{p}'") + + lambda_max = eigs(mat, k=1, which="LM", tol=tol) + if mat_inv is None: + lambda_min = eigs(mat, k=1, which="SM", tol=tol) + else: + lambda_min = eigs(mat_inv, k=1, which="LM", tol=tol) + + return np.abs(lambda_max) / np.abs(lambda_min) + +# }}} + # vim: foldmethod=marker diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py index f51ba1887..2c2cc5165 100644 --- a/pytential/symbolic/matrix.py +++ b/pytential/symbolic/matrix.py @@ -561,7 +561,6 @@ def map_int_g(self, expr): expr.target.geometry, expr.target.discr_stage) actx = self.array_context - target_base_kernel = expr.target_kernel.get_base_kernel() result = 0 for density, kernel in zip(expr.densities, expr.source_kernels, strict=True): @@ -575,12 +574,10 @@ def map_int_g(self, expr): # {{{ generator - base_kernel = kernel.get_base_kernel() - from sumpy.p2p import P2PMatrixGenerator mat_gen = P2PMatrixGenerator(actx.context, - source_kernels=(base_kernel,), - target_kernels=(target_base_kernel,), + source_kernels=(kernel,), + target_kernels=(expr.target_kernel,), exclude_self=self.exclude_self) # }}} @@ -590,7 +587,7 @@ def map_int_g(self, expr): # {{{ kernel args # NOTE: copied from pytential.symbolic.primitives.IntG - kernel_args = base_kernel.get_args() + base_kernel.get_source_args() + kernel_args = kernel.get_args() + kernel.get_source_args() kernel_args = {arg.loopy_arg.name for arg in kernel_args} kernel_args = _get_layer_potential_args( @@ -775,7 +772,6 @@ def map_int_g(self, expr: prim.IntG): expr.target.geometry, expr.target.discr_stage) actx = self.array_context - target_base_kernel = expr.target_kernel.get_base_kernel() result = 0 for kernel, density in zip(expr.source_kernels, expr.densities, strict=True): @@ -796,12 +792,10 @@ def map_int_g(self, expr: prim.IntG): # {{{ generator - base_kernel = kernel.get_base_kernel() - from sumpy.p2p import P2PMatrixSubsetGenerator mat_gen = P2PMatrixSubsetGenerator(actx.context, - source_kernels=(base_kernel,), - target_kernels=(target_base_kernel,), + source_kernels=(kernel,), + target_kernels=(expr.target_kernel,), exclude_self=self.exclude_self) # }}} @@ -811,7 +805,7 @@ def map_int_g(self, expr: prim.IntG): # {{{ kernel args # NOTE: copied from pytential.symbolic.primitives.IntG - kernel_args = [*base_kernel.get_args(), *base_kernel.get_source_args()] + kernel_args = [*kernel.get_args(), *kernel.get_source_args()] kernel_args = {arg.loopy_arg.name for arg in kernel_args} kernel_args = _get_layer_potential_args( diff --git a/test/extra_matrix_data.py b/test/extra_matrix_data.py index 46ff9686f..d758b9e8c 100644 --- a/test/extra_matrix_data.py +++ b/test/extra_matrix_data.py @@ -49,17 +49,26 @@ class MatrixTestCaseMixin: proxy_target_cluster_builder: Callable[..., Any] | None = None neighbor_cluster_builder: Callable[..., Any] | None = None - def get_cluster_index(self, actx, places, dofdesc=None): + def max_particles_in_box_for_discr(self, discr): + max_particles_in_box = self.max_particles_in_box + if max_particles_in_box is None: + max_particles_in_box = discr.ndofs // self.approx_cluster_count + + return max_particles_in_box + + def get_cluster_index( + self, actx, places, dofdesc=None, max_particles_in_box=None): if dofdesc is None: dofdesc = places.auto_source discr = places.get_discretization(dofdesc.geometry) - max_particles_in_box = self.max_particles_in_box if max_particles_in_box is None: - max_particles_in_box = discr.ndofs // self.approx_cluster_count + max_particles_in_box = self.max_particles_in_box + if max_particles_in_box is None: + max_particles_in_box = discr.ndofs // self.approx_cluster_count - from pytential.linalg.proxy import partition_by_nodes - cindex = partition_by_nodes(actx, places, + from pytential.linalg.cluster import partition_by_nodes + cindex, ctree = partition_by_nodes(actx, places, dofdesc=dofdesc, tree_kind=self.tree_kind, max_particles_in_box=max_particles_in_box) @@ -81,12 +90,14 @@ def get_cluster_index(self, actx, places, dofdesc=None): from pytential.linalg import make_index_list cindex = make_index_list(subset) - return cindex + return cindex, ctree - def get_tgt_src_cluster_index(self, actx, places, dofdesc=None): + def get_tgt_src_cluster_index( + self, actx, places, dofdesc=None, max_particles_in_box=None): from pytential.linalg import TargetAndSourceClusterList - cindex = self.get_cluster_index(actx, places, dofdesc=dofdesc) - return TargetAndSourceClusterList(cindex, cindex) + cindex, ctree = self.get_cluster_index( + actx, places, dofdesc=dofdesc, max_particles_in_box=max_particles_in_box) + return TargetAndSourceClusterList(cindex, cindex), ctree def get_operator(self, ambient_dim, qbx_forced_limit=_NoArgSentinel): knl = self.knl_class(ambient_dim) diff --git a/test/test_linalg_cluster.py b/test/test_linalg_cluster.py new file mode 100644 index 000000000..25605993f --- /dev/null +++ b/test/test_linalg_cluster.py @@ -0,0 +1,133 @@ +from __future__ import annotations + + +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging + +import extra_matrix_data as extra +import numpy as np +import pytest + +from arraycontext import pytest_generate_tests_for_array_contexts +from meshmode import _acf # noqa: F401 +from meshmode.array_context import PytestPyOpenCLArrayContextFactory +from meshmode.mesh.generation import NArmedStarfish + +from pytential import GeometryCollection + + +logger = logging.getLogger(__name__) + +pytest_generate_tests = pytest_generate_tests_for_array_contexts([ + PytestPyOpenCLArrayContextFactory, + ]) + +CLUSTER_TEST_CASES = [ + extra.CurveTestCase( + name="starfish", + target_order=4, + curve_fn=NArmedStarfish(5, 0.25), + resolutions=[64]), + extra.TorusTestCase( + target_order=4, + resolutions=[1]) + ] + + +# {{{ test_cluster_tree + +@pytest.mark.parametrize(("case", "tree_kind"), [ + (CLUSTER_TEST_CASES[0], None), + (CLUSTER_TEST_CASES[0], "adaptive"), + (CLUSTER_TEST_CASES[0], "adaptive-level-restricted"), + (CLUSTER_TEST_CASES[1], "adaptive"), + ]) +def test_cluster_tree(actx_factory, case, tree_kind, visualize=False): + if visualize: + logging.basicConfig(level=logging.INFO) + + from dataclasses import replace + actx = actx_factory() + case = replace(case, tree_kind=tree_kind) + logger.info("\n%s", case) + + discr = case.get_discretization(actx, case.resolutions[-1], case.target_order) + places = GeometryCollection(discr, auto_where=case.name) + + srcindex, ctree = case.get_cluster_index(actx, places) + assert srcindex.nclusters == ctree.nclusters + + from pytential.linalg.cluster import split_array + rng = np.random.default_rng(42) + x = split_array(rng.random(srcindex.indices.shape), srcindex) + + logger.info("nclusters %4d nlevels %4d", srcindex.nclusters, ctree.nlevels) + + if visualize and ctree._tree is not None: + import matplotlib.pyplot as plt + fig = plt.figure(figsize=(10, 10), dpi=300) + + from boxtree.visualization import TreePlotter + plotter = TreePlotter(ctree._tree) + plotter.draw_tree(fill=False, edgecolor="black", zorder=10) + plotter.draw_box_numbers() + plotter.set_bounding_box() + + fig.savefig("test_cluster_tree") + + from pytential.linalg.cluster import cluster, uncluster + for clevel in ctree.levels: + logger.info("======== Level %d", clevel.level) + logger.info("box_ids %s", clevel.box_ids) + logger.info("sizes %s", np.diff(srcindex.starts)) + logger.info("parent_map %s", clevel.parent_map) + + assert srcindex.nclusters == clevel.nclusters + + next_srcindex = cluster(srcindex, clevel) + for i, ppm in enumerate(clevel.parent_map): + partition = np.concatenate([srcindex.cluster_indices(j) for j in ppm]) + + assert partition.size == next_srcindex.cluster_size(i) + assert np.allclose(partition, next_srcindex.cluster_indices(i)) + + y = cluster(x, clevel) + z = uncluster(y, srcindex, clevel) + assert all(np.allclose(xi, zi) for xi, zi in zip(x, z, strict=True)) + + srcindex = next_srcindex + x = y + +# }}} + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker diff --git a/test/test_linalg_hmatrix.py b/test/test_linalg_hmatrix.py new file mode 100644 index 000000000..84aabf53f --- /dev/null +++ b/test/test_linalg_hmatrix.py @@ -0,0 +1,593 @@ +from __future__ import annotations + + +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging +from dataclasses import replace + +import extra_matrix_data as extra +import numpy as np +import pytest + +from arraycontext import pytest_generate_tests_for_array_contexts +from meshmode import _acf # noqa: F401 +from meshmode.array_context import PytestPyOpenCLArrayContextFactory +from meshmode.mesh.generation import NArmedStarfish + +from pytential import GeometryCollection, bind, sym + + +logger = logging.getLogger(__name__) + +pytest_generate_tests = pytest_generate_tests_for_array_contexts([ + PytestPyOpenCLArrayContextFactory, + ]) + + +HMATRIX_TEST_CASES = [ + extra.CurveTestCase( + name="starfish", + op_type="scalar", + target_order=4, + curve_fn=NArmedStarfish(5, 0.25), + resolutions=[512]), + extra.CurveTestCase( + name="starfish", + op_type="double", + target_order=4, + curve_fn=NArmedStarfish(5, 0.25), + resolutions=[512]), + extra.TorusTestCase( + target_order=4, + op_type="scalar", + resolutions=[0]) + ] + + +# {{{ test_hmatrix_forward_matvec_single_level + +def hmatrix_matvec_single_level(mat, x, skeleton): + from pytential.linalg.cluster import split_array + targets, sources = skeleton.tgt_src_index + y = split_array(x, sources) + + y_hat = np.empty(y.shape, dtype=object) + + for i in range(skeleton.nclusters): + y_hat[i] = skeleton.R[i] @ y[i] + + from pytential.linalg.utils import skeletonization_matrix + D, S = skeletonization_matrix(mat, skeleton) + syhat = np.zeros(y.shape, dtype=object) + + from itertools import product + for i, j in product(range(skeleton.nclusters), repeat=2): + if i == j: + continue + + syhat[i] = syhat[i] + S[i, j] @ y_hat[j] + + for i in range(skeleton.nclusters): + y[i] = D[i] @ y[i] + skeleton.L[i] @ syhat[i] + + return np.concatenate(y)[np.argsort(targets.indices)] + + +@pytest.mark.parametrize("case", HMATRIX_TEST_CASES) +@pytest.mark.parametrize("discr_stage", [sym.QBX_SOURCE_STAGE1]) +def test_hmatrix_forward_matvec_single_level( + actx_factory, case, discr_stage, visualize=False): + actx = actx_factory() + rng = np.random.default_rng(42) + + if visualize: + logging.basicConfig(level=logging.INFO) + + if case.ambient_dim == 2: + kwargs = {"proxy_approx_count": 64, "proxy_radius_factor": 1.15} + else: + kwargs = {"proxy_approx_count": 256, "proxy_radius_factor": 1.25} + + case = replace(case, skel_discr_stage=discr_stage, **kwargs) + logger.info("\n%s", case) + + # {{{ geometry + + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + qbx = case.get_layer_potential(actx, case.resolutions[-1], case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + density_discr = places.get_discretization(dd.geometry, dd.discr_stage) + tgt_src_index, _ = case.get_tgt_src_cluster_index(actx, places, dd) + + logger.info("dd %s", dd) + logger.info("nclusters %3d ndofs %7d", + tgt_src_index.nclusters, density_discr.ndofs) + + # }}} + + # {{{ construct reference + + from pytential.linalg.direct_solver_symbolic import prepare_expr + from pytential.symbolic.matrix import MatrixBuilder + sym_u, sym_op = case.get_operator(places.ambient_dim) + sym_op_prepr, = prepare_expr(places, [sym_op], (dd, dd)) + mat = MatrixBuilder( + actx, + dep_expr=sym_u, + other_dep_exprs=[], + dep_discr=density_discr, + places=places, + context={}, + )(sym_op_prepr) + + from arraycontext import flatten, unflatten + x = actx.thaw(density_discr.nodes()[0]) + y = actx.to_numpy(flatten(x, actx)) + r_lpot = unflatten(x, actx.from_numpy(mat @ y), actx) + + # }}} + + # {{{ check matvec + + id_eps = 10.0 ** (-np.arange(2, 16)) + rec_error = np.zeros_like(id_eps) + + from pytools.convergence import EOCRecorder + eoc = EOCRecorder() + + from pytential.linalg.skeletonization import skeletonize_by_proxy + for i in range(id_eps.size): + skeleton = skeletonize_by_proxy( + actx, places, tgt_src_index, sym_op, sym_u, + domains=[dd], context={}, + approx_nproxy=case.proxy_approx_count, + proxy_radius_factor=case.proxy_radius_factor, + id_eps=id_eps[i], + rng=rng, + ) + r_hmat = hmatrix_matvec_single_level(mat, y, skeleton[0, 0]) + r_hmat = unflatten(x, actx.from_numpy(r_hmat), actx) + + from meshmode.dof_array import flat_norm + rec_error[i] = actx.to_numpy( + flat_norm(r_hmat - r_lpot) / flat_norm(r_lpot) + ) + logger.info("id_eps %.2e error: %.12e", id_eps[i], rec_error[i]) + # assert rec_error[i] < 0.1 + + eoc.add_data_point(id_eps[i], rec_error[i]) + + logger.info("\n%s", eoc.pretty_print( + abscissa_format="%.8e", + error_format="%.8e", + eoc_format="%.2f")) + + # }}} + + if not visualize: + return + + import matplotlib.pyplot as pt + fig = pt.figure(figsize=(10, 10), dpi=300) + ax = fig.gca() + + ax.loglog(id_eps, id_eps, "k--") + ax.loglog(id_eps, rec_error) + + ax.grid(True) + ax.set_xlabel(r"$\epsilon_{id}$") + ax.set_ylabel("$Error$") + ax.set_title(case.name) + + basename = "linalg_hmatrix_single_matvec" + fig.savefig(f"{basename}_{case.name}_{case.op_type}_convergence") + + if case.ambient_dim == 2: + fig.clf() + ax = fig.gca() + + from arraycontext import flatten + r_hmap = actx.to_numpy(flatten(r_hmat, actx)) + r_lpot = actx.to_numpy(flatten(r_lpot, actx)) + + ax.semilogy(r_hmap - r_lpot) + ax.set_ylim([1.0e-16, 1.0]) + fig.savefig(f"{basename}_{case.name}_{case.op_type}_error") + + pt.close(fig) + +# }}} + + +# {{{ test_hmatrix_forward_matvec + +@pytest.mark.parametrize("case", [ + HMATRIX_TEST_CASES[0], + HMATRIX_TEST_CASES[1], + pytest.param(HMATRIX_TEST_CASES[2], marks=pytest.mark.slowtest), + ]) +@pytest.mark.parametrize("discr_stage", [ + sym.QBX_SOURCE_STAGE1, + # sym.QBX_SOURCE_STAGE2 + ]) +def test_hmatrix_forward_matvec( + actx_factory, case, discr_stage, p2p=False, visualize=False): + actx = actx_factory() + rng = np.random.default_rng(42) + + if visualize: + logging.basicConfig(level=logging.INFO) + + if case.ambient_dim == 2: + kwargs = {"proxy_approx_count": 64, "proxy_radius_factor": 1.25} + else: + kwargs = {"proxy_approx_count": 256, "proxy_radius_factor": 1.25} + + case = replace(case, skel_discr_stage=discr_stage, **kwargs) + logger.info("\n%s", case) + + # {{{ geometry + + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + qbx = case.get_layer_potential(actx, case.resolutions[-1], case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + density_discr = places.get_discretization(dd.geometry, dd.discr_stage) + max_particles_in_box = case.max_particles_in_box_for_discr(density_discr) + + tgt_src_index, _ = case.get_tgt_src_cluster_index( + actx, places, dd, max_particles_in_box=max_particles_in_box) + + logger.info("dd %s", dd) + logger.info("nclusters %3d ndofs %7d", + tgt_src_index.nclusters, density_discr.ndofs) + + # }}} + + # {{{ construct hmatrix + + from pytential.linalg.hmatrix import build_hmatrix_by_proxy + sym_u, sym_op = case.get_operator(places.ambient_dim) + + x = actx.thaw(density_discr.nodes()[0]) + + if p2p: + # NOTE: this also needs changed in `build_hmatrix_by_proxy` + # to actually evaluate the p2p interactions instead of qbx + from pytential.linalg.direct_solver_symbolic import prepare_expr + from pytential.symbolic.matrix import P2PMatrixBuilder + mat = P2PMatrixBuilder( + actx, + dep_expr=sym_u, + other_dep_exprs=[], + dep_discr=density_discr, + places=places, + context={}, + )(prepare_expr(places, sym_op, (dd, dd))) + + from arraycontext import flatten, unflatten + y = actx.to_numpy(flatten(x, actx)) + r_lpot = unflatten(x, actx.from_numpy(mat @ y), actx) + else: + r_lpot = bind(places, sym_op, auto_where=dd)(actx, u=x) + + from pytential.linalg.hmatrix import hmatrix_error_from_param + id_eps = 10.0 ** (-np.arange(2, 16)) + rec_error = np.zeros_like(id_eps) + model_error = np.zeros_like(id_eps) + + from pytools.convergence import EOCRecorder + eoc = EOCRecorder() + + for i in range(id_eps.size): + wrangler = build_hmatrix_by_proxy( + actx, places, sym_op, sym_u, + domains=[dd], + context=case.knl_concrete_kwargs, + id_eps=id_eps[i], + rng=rng, + _tree_kind=case.tree_kind, + _max_particles_in_box=max_particles_in_box, + _approx_nproxy=case.proxy_approx_count, + _proxy_radius_factor=case.proxy_radius_factor, + ) + hmat = wrangler.get_forward() + + # {{{ skeletonization error + + from meshmode.dof_array import flat_norm + r_hmap = hmat @ x + rec_error[i] = actx.to_numpy( + flat_norm(r_hmap - r_lpot) / flat_norm(r_lpot) + ) + + # }}} + + # {{{ model error + + skeleton = hmat.skeletons[0] + icluster = np.argmax(np.diff(skeleton.skel_tgt_src_index.targets.starts)) + + proxy_radius = actx.to_numpy( + skeleton._src_eval_result.pxy.radii[icluster] + ) + cluster_radius = actx.to_numpy( + skeleton._src_eval_result.pxy.cluster_radii[icluster] + ) + + model_error[i] = hmatrix_error_from_param( + places.ambient_dim, + id_eps=id_eps[i], + min_proxy_radius=proxy_radius, + max_cluster_radius=cluster_radius, + id_rank=skeleton.skel_tgt_src_index.targets.cluster_size(icluster), + nproxies=skeleton._src_eval_result.pxy.pxyindex.cluster_size(icluster), + ntargets=skeleton.tgt_src_index.targets.cluster_size(icluster), + nsources=skeleton.tgt_src_index.targets.cluster_size(icluster), + c=1.0e-8 + ) + + # }}} + + logger.info("id_eps %.2e error: %.12e (%.12e)", + id_eps[i], rec_error[i], model_error[i]) + eoc.add_data_point(id_eps[i], rec_error[i]) + + logger.info("\n%s", eoc.pretty_print( + abscissa_format="%.8e", + error_format="%.8e", + eoc_format="%.2f")) + + if not visualize: + assert eoc.order_estimate() > 0.6 + + # }}} + + if not visualize: + return + + import matplotlib.pyplot as pt + fig = pt.figure(figsize=(10, 10), dpi=300) + ax = fig.gca() + + ax.loglog(id_eps, id_eps, "k--") + ax.loglog(id_eps, rec_error) + ax.loglog(id_eps, model_error) + + ax.grid(True) + ax.set_xlabel(r"$\epsilon_{id}$") + ax.set_ylabel("$Error$") + ax.set_title(case.name) + + lpot_name = "p2p" if p2p else "qbx" + basename = f"linalg_hmatrix_{lpot_name}_matvec" + fig.savefig(f"{basename}_{case.name}_{case.op_type}_convergence") + + if case.ambient_dim == 2: + fig.clf() + ax = fig.gca() + + from arraycontext import flatten + r_hmap = actx.to_numpy(flatten(r_hmap, actx)) + r_lpot = actx.to_numpy(flatten(r_lpot, actx)) + + ax.semilogy(r_hmap - r_lpot) + ax.set_ylim([1.0e-16, 1.0]) + fig.savefig(f"{basename}_{case.name}_{case.op_type}_error") + + pt.close(fig) + +# }}} + + +# {{{ test_hmatrix_backward_matvec + +@pytest.mark.parametrize("case", [ + HMATRIX_TEST_CASES[0], + HMATRIX_TEST_CASES[1], + pytest.param(HMATRIX_TEST_CASES[2], marks=pytest.mark.slowtest), + ]) +@pytest.mark.parametrize("discr_stage", [ + sym.QBX_SOURCE_STAGE1, + # sym.QBX_SOURCE_STAGE2 + ]) +def test_hmatrix_backward_matvec(actx_factory, case, discr_stage, visualize=False): + actx = actx_factory() + rng = np.random.default_rng(42) + + if visualize: + logging.basicConfig(level=logging.INFO) + + if case.ambient_dim == 2: + kwargs = {"proxy_approx_count": 64, "proxy_radius_factor": 1.25} + else: + kwargs = {"proxy_approx_count": 64, "proxy_radius_factor": 1.25} + + case = replace(case, skel_discr_stage=discr_stage, **kwargs) + logger.info("\n%s", case) + + # {{{ geometry + + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + qbx = case.get_layer_potential(actx, case.resolutions[-1], case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + density_discr = places.get_discretization(dd.geometry, dd.discr_stage) + max_particles_in_box = case.max_particles_in_box_for_discr(density_discr) + + tgt_src_index, _ = case.get_tgt_src_cluster_index( + actx, places, dd, max_particles_in_box=max_particles_in_box) + + logger.info("dd %s", dd) + logger.info("nclusters %3d ndofs %7d", + tgt_src_index.nclusters, density_discr.ndofs) + + # }}} + + # {{{ + + sym_u, sym_op = case.get_operator(places.ambient_dim) + + if visualize: + from pytential.linalg.direct_solver_symbolic import prepare_expr + from pytential.symbolic.matrix import MatrixBuilder + mat = MatrixBuilder( + actx, + dep_expr=sym_u, + other_dep_exprs=[], + dep_discr=density_discr, + places=places, + context={}, + )(prepare_expr(places, sym_op, (dd, dd))) + + import pytential.linalg.utils as hla + eigs_ref = hla.eigs(mat, k=5) + kappa_ref = np.linalg.cond(mat, p=2) + + # }}} + + # {{{ construct hmatrix + + from pytential.linalg.hmatrix import build_hmatrix_by_proxy + sym_u, sym_op = case.get_operator(places.ambient_dim) + + x_ref = actx.thaw(density_discr.nodes()[0]) + b_ref = bind(places, sym_op, auto_where=dd)(actx, u=x_ref) + + id_eps = 10.0 ** (-np.arange(2, 16)) + rec_error = np.zeros_like(id_eps) + + if visualize: + rec_eigs = np.zeros((id_eps.size, eigs_ref.size), dtype=np.complex128) + rec_kappa = np.zeros(id_eps.size) + + from pytools.convergence import EOCRecorder + eoc = EOCRecorder() + + for i in range(id_eps.size): + wrangler = build_hmatrix_by_proxy( + actx, places, sym_op, sym_u, + domains=[dd], + context=case.knl_concrete_kwargs, + id_eps=id_eps[i], + rng=rng, + _tree_kind=case.tree_kind, + _max_particles_in_box=max_particles_in_box, + _approx_nproxy=case.proxy_approx_count, + _proxy_radius_factor=case.proxy_radius_factor, + ) + + hmat_inv = wrangler.get_backward() + x_hmat = hmat_inv @ b_ref + + if visualize: + hmat = wrangler.get_forward() + rec_eigs[i, :] = hla.eigs(hmat, k=5, tol=1.0e-6) + rec_kappa[i] = hla.cond(hmat, p=2, tol=1.0e-6) + + logger.info("eigs: %s %s", eigs_ref, rec_eigs[i]) + logger.info("kappa %.12e %.12e", kappa_ref, rec_kappa[i]) + + from meshmode.dof_array import flat_norm + rec_error[i] = actx.to_numpy( + flat_norm(x_hmat - x_ref) / flat_norm(x_ref) + ) + logger.info("id_eps %.2e error: %.12e", id_eps[i], rec_error[i]) + eoc.add_data_point(id_eps[i], rec_error[i]) + + logger.info("\n%s", eoc.pretty_print( + abscissa_format="%.8e", + error_format="%.8e", + eoc_format="%.2f")) + + if not visualize: + assert eoc.order_estimate() > 0.6 + + # }}} + + if not visualize: + return + + import matplotlib.pyplot as pt + fig = pt.figure(figsize=(10, 10), dpi=300) + + # {{{ convergence + + ax = fig.gca() + ax.loglog(id_eps, id_eps, "k--") + ax.loglog(id_eps, rec_error) + + ax.grid(True) + ax.set_xlabel(r"$\epsilon_{id}$") + ax.set_ylabel("$Error$") + ax.set_title(case.name) + + fig.savefig(f"linalg_hmatrix_inverse_{case.name}_{case.op_type}_convergence") + fig.clf() + + # }}} + + # {{{ eigs + + ax = fig.gca() + ax.plot(np.real(eigs_ref), np.imag(eigs_ref), "ko") + for i in range(id_eps.size): + ax.plot(np.real(rec_eigs[i]), np.imag(rec_eigs[i]), "v") + + ax.grid(True) + ax.set_xlabel(r"$\Re \lambda$") + ax.set_ylabel(r"$\Im \lambda$") + + fig.savefig(f"linalg_hmatrix_inverse_{case.name}_{case.op_type}_eigs") + fig.clf() + + # }}} + + if case.ambient_dim == 2: + ax = fig.gca() + + from arraycontext import flatten + x_hmat = actx.to_numpy(flatten(x_hmat, actx)) + x_ref = actx.to_numpy(flatten(x_ref, actx)) + + ax.semilogy(x_hmat - x_ref) + ax.set_ylim([1.0e-16, 1.0]) + fig.savefig(f"linalg_hmatrix_inverse_{case.name}_{case.op_type}_error") + fig.clf() + + pt.close(fig) + +# }}} + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py index 71d2df405..a3262f2e8 100644 --- a/test/test_linalg_proxy.py +++ b/test/test_linalg_proxy.py @@ -221,7 +221,7 @@ def test_partition_points( places = GeometryCollection(qbx, auto_where=case.name) density_discr = places.get_discretization(case.name) - mindex = case.get_cluster_index(actx, places) + mindex, _ = case.get_cluster_index(actx, places) expected_indices = np.arange(0, density_discr.ndofs) assert mindex.starts[-1] == density_discr.ndofs @@ -268,7 +268,7 @@ def test_proxy_generator(actx_factory: ArrayContextFactory, case, places = GeometryCollection(qbx, auto_where=case.name) density_discr = places.get_discretization(case.name) - cindex = case.get_cluster_index(actx, places) + cindex, _ = case.get_cluster_index(actx, places) generator = proxy_generator_cls(places, approx_nproxy=case.proxy_approx_count, @@ -314,7 +314,7 @@ def test_proxy_generator(actx_factory: ArrayContextFactory, case, ProxyGenerator, QBXProxyGenerator, ]) @pytest.mark.parametrize("index_sparsity_factor", [1.0, 0.6]) -@pytest.mark.parametrize("proxy_radius_factor", [1, 1.1]) +@pytest.mark.parametrize("proxy_radius_factor", [1.0, 1.1]) def test_neighbor_points(actx_factory: ArrayContextFactory, case, proxy_generator_cls, index_sparsity_factor, proxy_radius_factor, visualize=False): @@ -337,7 +337,7 @@ def test_neighbor_points(actx_factory: ArrayContextFactory, case, dofdesc = places.auto_source density_discr = places.get_discretization(dofdesc.geometry) - srcindex = case.get_cluster_index(actx, places) + srcindex, _ = case.get_cluster_index(actx, places) # generate proxy points generator = proxy_generator_cls(places, @@ -347,7 +347,7 @@ def test_neighbor_points(actx_factory: ArrayContextFactory, case, # get neighboring points from pytential.linalg.proxy import gather_cluster_neighbor_points - nbrindex = gather_cluster_neighbor_points(actx, pxy) + nbrindex = gather_cluster_neighbor_points(actx, pxy, srcindex) pxy = pxy.to_numpy(actx) nodes = actx.to_numpy( diff --git a/test/test_linalg_skeletonization.py b/test/test_linalg_skeletonization.py index 76ea5e995..a2c1fb809 100644 --- a/test/test_linalg_skeletonization.py +++ b/test/test_linalg_skeletonization.py @@ -127,39 +127,124 @@ def test_skeletonize_symbolic(actx_factory: ArrayContextFactory, case, visualize places = GeometryCollection(qbx, auto_where=dd) density_discr = places.get_discretization(dd.geometry, dd.discr_stage) - tgt_src_index = case.get_tgt_src_cluster_index(actx, places, dd) + tgt_src_index, ctree = case.get_tgt_src_cluster_index(actx, places, dd) logger.info("nclusters %3d ndofs %7d", tgt_src_index.nclusters, density_discr.ndofs) # }}} - # {{{ wranglers + from pytential.linalg.skeletonization import rec_skeletonize_by_proxy - from pytential.linalg.proxy import QBXProxyGenerator - proxy_generator = QBXProxyGenerator(places, - radius_factor=case.proxy_radius_factor, - approx_nproxy=case.proxy_approx_count) + sym_u, sym_op = case.get_operator(places.ambient_dim) + rec_skeletonize_by_proxy( + actx, places, ctree, tgt_src_index, sym_op, sym_u, + context=case.knl_concrete_kwargs, + auto_where=dd, + id_eps=1.0e-8, + rng=rng + ) + +# }}} + + +# {{{ test_skeletonize_diagonal + +@pytest.mark.parametrize("case", [ + SKELETONIZE_TEST_CASES[0], + SKELETONIZE_TEST_CASES[1], + SKELETONIZE_TEST_CASES[2], + ]) +def test_skeletonize_diagonal(actx_factory, case, visualize=False): + import scipy.linalg.interpolative as sli + sli.seed(42) + + actx = actx_factory() + rng = np.random.default_rng(42) + + if visualize: + logging.basicConfig(level=logging.INFO) + + # {{{ setup + + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + resolution = case.resolutions[-1] + + qbx = case.get_layer_potential(actx, resolution, case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + tgt_src_index, ctree = case.get_tgt_src_cluster_index(actx, places, dd) - from pytential.linalg.skeletonization import make_skeletonization_wrangler sym_u, sym_op = case.get_operator(places.ambient_dim) - wrangler = make_skeletonization_wrangler(places, sym_op, sym_u, - domains=None, - context=case.knl_concrete_kwargs, - _weighted_proxy=case.weighted_proxy, - _proxy_source_cluster_builder=case.proxy_source_cluster_builder, - _proxy_target_cluster_builder=case.proxy_target_cluster_builder, - _neighbor_cluster_builder=case.neighbor_cluster_builder) # }}} - from pytential.linalg.skeletonization import _skeletonize_block_by_proxy_with_mats + # {{{ check - _skeletonize_block_by_proxy_with_mats( - actx, 0, 0, places, proxy_generator, wrangler, tgt_src_index, - id_eps=1.0e-8, + from pytential.linalg.skeletonization import make_skeletonization_wrangler + wrangler = make_skeletonization_wrangler( + places, sym_op, sym_u, + auto_where=dd, context=case.knl_concrete_kwargs) + + from pytential.linalg.skeletonization import rec_skeletonize_by_proxy + skeletons = rec_skeletonize_by_proxy( + actx, places, ctree, tgt_src_index, sym_op, sym_u, + auto_where=dd, + context=case.knl_concrete_kwargs, + approx_nproxy=case.proxy_approx_count, + proxy_radius_factor=case.proxy_radius_factor, + id_eps=case.id_eps, rng=rng, - ) + _wrangler=wrangler, + ) + + from pytential.linalg.hmatrix import _update_skeleton_diagonal + for i in range(1, skeletons.size): + skeletons[i] = _update_skeleton_diagonal( + skeletons[i], skeletons[i - 1], ctree.levels[i - 1], + ) + + from pytential.linalg.cluster import cluster + parent = None + for k, clevel in enumerate(ctree.levels): + from pytential.linalg.utils import make_flat_cluster_diag + + tgt_src_index = skeletons[k].tgt_src_index + D1 = wrangler.evaluate_self(actx, places, tgt_src_index, 0, 0) + D1 = make_flat_cluster_diag(D1, tgt_src_index) + + if k == 0: + D = D0 = D1 + else: + skel_tgt_src_index = skeletons[k - 1].skel_tgt_src_index + assert skel_tgt_src_index.shape == tgt_src_index.shape + + D0 = wrangler.evaluate_self(actx, places, skel_tgt_src_index, 0, 0) + D0 = cluster(make_flat_cluster_diag(D0, skel_tgt_src_index), parent) + + D = D1 - D0 + + parent = clevel + + assert D1.shape == (skeletons[k].nclusters,) + assert D1.shape == D0.shape, (D1.shape, D0.shape) + assert D1.shape == D.shape, (D1.shape, D.shape) + + for i in range(skeletons[k].nclusters): + assert D1[i].shape == skeletons[k].tgt_src_index.cluster_shape(i, i) + assert D1[i].shape == D0[i].shape, (D1[i].shape, D0[i].shape) + + error = la.norm(D[i] - skeletons[k].D[i]) / (la.norm(D[i]) + 1.0e-12) + logger.info("level %04d / %04d cluster %3d (%4d, %4d) error %.12e", + k, ctree.nlevels, + ctree.tree_cluster_parent_ids[clevel.box_ids][i], + *skeletons[k].tgt_src_index.cluster_shape(i, i), error) + + assert error < 1.0e-15 + + logger.info("") + + # }}} # }}} @@ -186,7 +271,7 @@ def run_skeletonize_by_proxy(actx, case, resolution, density_discr = places.get_discretization(dd.geometry, dd.discr_stage) if tgt_src_index is None: - tgt_src_index = case.get_tgt_src_cluster_index(actx, places, dd) + tgt_src_index, _ = case.get_tgt_src_cluster_index(actx, places, dd) logger.info("nclusters %3d ndofs %7d", tgt_src_index.nclusters, density_discr.ndofs) @@ -362,19 +447,18 @@ def intersect1d(x, y): # }}} - return err_f, (places, mat) + return err_f, (places, mat, skeleton) @pytest.mark.parametrize("case", [ - # NOTE: skip 2d tests, since they're better checked for convergence in - # `test_skeletonize_by_proxy_convergence` - # SKELETONIZE_TEST_CASES[0], SKELETONIZE_TEST_CASES[1], + SKELETONIZE_TEST_CASES[0], + SKELETONIZE_TEST_CASES[1], SKELETONIZE_TEST_CASES[2], ]) def test_skeletonize_by_proxy(actx_factory: ArrayContextFactory, case, visualize=False): - r"""Test single-level skeletonization accuracy. Checks that the error - satisfies :math:`e < c \epsilon_{id}` for a fixed ID tolerance and an - empirically determined (not too huge) :math:`c`. + r"""Test multilevel skeletonization accuracy. Checks that the error for + every level satisfies :math:`e < c \epsilon_{id}` for a fixed ID tolerance + and an empirically determined (not too huge) :math:`c`. """ import scipy.linalg.interpolative as sli @@ -390,13 +474,27 @@ def test_skeletonize_by_proxy(actx_factory: ArrayContextFactory, case, visualize case = replace(case, approx_cluster_count=6, id_eps=1.0e-8) logger.info("\n%s", case) - run_skeletonize_by_proxy( - actx, case, case.resolutions[0], - ctol=10 * case.id_eps, - # FIXME: why is the 3D error so large? - rtol=10**case.ambient_dim * case.id_eps, - rng=rng, - visualize=visualize) + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + qbx = case.get_layer_potential(actx, case.resolutions[0], case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + tgt_src_index, ctree = case.get_tgt_src_cluster_index(actx, places, dd) + mat = None + + from pytential.linalg.cluster import cluster + for clevel in ctree.levels[:-1]: + logger.info("[%2d/%2d] nclusters %3d", + clevel.level, ctree.nlevels, clevel.nclusters) + + _, (_, mat, skeleton) = run_skeletonize_by_proxy( + actx, case, case.resolutions[0], + ctol=10 * case.id_eps, + # FIXME: why is the 3D error so large? + rtol=10**case.ambient_dim * case.id_eps, + places=places, mat=mat, rng=rng, tgt_src_index=tgt_src_index, + visualize=visualize) + + tgt_src_index = cluster(skeleton.skel_tgt_src_index, clevel) # }}} @@ -466,7 +564,7 @@ def test_skeletonize_by_proxy_convergence( # NOTE: don't skeletonize anymore if we reached zero error, but we still # want to loop to do `eoc.add_data_point()` if not was_zero: - rec_error[i], (places, mat) = run_skeletonize_by_proxy( + rec_error[i], (places, mat, _) = run_skeletonize_by_proxy( actx, case, r, places=places, mat=mat, suffix=f"{suffix}_{i:04d}", rng=rng, visualize=False) diff --git a/test/test_matrix.py b/test/test_matrix.py index c12c2c54f..e06a0015f 100644 --- a/test/test_matrix.py +++ b/test/test_matrix.py @@ -367,7 +367,7 @@ def test_cluster_builder( # {{{ matrix - mindex = case.get_tgt_src_cluster_index(actx, places) + mindex, _ = case.get_tgt_src_cluster_index(actx, places) kwargs = { "dep_expr": sym_u, "other_dep_exprs": [], @@ -495,8 +495,8 @@ def test_build_matrix_fixed_stage( logger.info("ndofs: %d", target_discr.ndofs) from pytential.linalg import TargetAndSourceClusterList - itargets = case.get_cluster_index(actx, places, target_dd) - jsources = case.get_cluster_index(actx, places, source_dd) + itargets, _ = case.get_cluster_index(actx, places, target_dd) + jsources, _ = case.get_cluster_index(actx, places, source_dd) mindex = TargetAndSourceClusterList(itargets, jsources) kwargs = {